6817 lines
977 KiB
Plaintext
6817 lines
977 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Version 1 Simulation\n",
|
|
"\n",
|
|
"The first version of this series is a basic control model. Given an elevation profile $H(x)$ and a time target, minimize energy usage.\n",
|
|
"We assume the time target is constant, since we are racing at a given overall pace. In other words, we already know the average speed $E(V) = dist/time$"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import jax.numpy as jnp\n",
|
|
"from jax import jit, vmap, lax\n",
|
|
"from jax import random\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"@jit\n",
|
|
"def _cov_math(t, s, H):\n",
|
|
" return 0.5 * (jnp.abs(t) ** 2 * H + jnp.abs(s) ** 2 * H - jnp.abs(t - s) ** 2 * H)\n",
|
|
"\n",
|
|
"\n",
|
|
"def _fbm_covariance(n, H) -> jnp.ndarray:\n",
|
|
" tidx = jnp.arange(1, n + 1)\n",
|
|
" t, s = jnp.meshgrid(tidx, tidx)\n",
|
|
"\n",
|
|
" # fBm covariance equation from wikipedia\n",
|
|
" cov = 0.5 * (jnp.abs(t) ** 2 * H + jnp.abs(s) ** 2 * H - jnp.abs(t - s) ** 2 * H)\n",
|
|
" return cov\n",
|
|
"\n",
|
|
"# generate terrain using fractional brownian motion\n",
|
|
"def gen_elevation_profile(rngkey: random.PRNGKey, n_steps: int, H: float):\n",
|
|
" t = jnp.linspace(0,1,n_steps)\n",
|
|
" cov = _fbm_covariance(n_steps, H)\n",
|
|
" # using the \"method 1\" (cholesky decomposition)\n",
|
|
" sigma = jnp.linalg.cholesky(cov)\n",
|
|
" # create a vector of n_steps gaussian normal values\n",
|
|
" v = random.normal(rngkey, shape=(n_steps))\n",
|
|
" # convert these to fbm lines\n",
|
|
"\n",
|
|
" fbm_samples = sigma * v\n",
|
|
"\n",
|
|
" return t, fbm_samples\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[[nan -0. -0. ... -0. -0. -0.]\n",
|
|
" [nan nan -0. ... -0. -0. -0.]\n",
|
|
" [nan nan nan ... -0. -0. -0.]\n",
|
|
" ...\n",
|
|
" [nan nan nan ... nan -0. -0.]\n",
|
|
" [nan nan nan ... nan nan -0.]\n",
|
|
" [nan nan nan ... nan nan nan]]\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 1200x600 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"key = random.PRNGKey(0)\n",
|
|
"steps = 1000\n",
|
|
"samples = 5\n",
|
|
"\n",
|
|
"H = 0.6\n",
|
|
"\n",
|
|
"t, fbm = gen_elevation_profile(key, steps, H)\n",
|
|
"plt.figure(figsize=(12,6))\n",
|
|
"print(fbm)\n",
|
|
"for i in range(fbm.shape[0]):\n",
|
|
" plt.plot(t, fbm[i], label=f\"Sample {i}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7101983fd730>]"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"steps = 100\n",
|
|
"key = random.key(25)\n",
|
|
"\n",
|
|
"def uniform_window(n):\n",
|
|
" return jnp.ones(n)/n\n",
|
|
"\n",
|
|
"def generate_basic_terrain(key, steps=100, yscale=1.0, xscale=10.0, window=uniform_window, window_size=5):\n",
|
|
" key, split = random.split(key)\n",
|
|
" v = random.normal(split, shape=(steps))\n",
|
|
" y = jnp.cumsum(v) * yscale\n",
|
|
" # smooth with a windowing function\n",
|
|
" y_smooth = jnp.convolve(y, window(window_size), mode='same')\n",
|
|
" # compute the x-values\n",
|
|
" x = jnp.arange(steps) * xscale\n",
|
|
" return x,y_smooth\n",
|
|
"\n",
|
|
"\n",
|
|
"x,y = generate_basic_terrain(key)\n",
|
|
" \n",
|
|
"slope = jnp.atan(jnp.diff(y, prepend=0) / 10.0) * 180 / jnp.pi\n",
|
|
"plt.plot(x,y)\n",
|
|
"plt.plot(x, slope)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7101ac7f1070>]"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# we can compute the slope at any point along the terrain\n",
|
|
"plt.plot(x, slope)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%run ../src/solarcarsim/physsim.py"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CarParams(mass=800, frontal_area=1.3, drag_coeff=0.18, rolling_coeff=0.002, moter_eff=0.93, wheel_radius=0.23, max_speed=30.0, solar_area=5.0, solar_eff=0.2, n_motors=2, motor=MotorParams(kv=8.43, kt=1.1, resistance=100.0, friction_coeff=0.001, iron_coeff=0.001), battery=BatteryParams(shape=(36, 19), resistance=0.0126, initial_energy=66600.0))\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from functools import partial\n",
|
|
"import jax\n",
|
|
"p = CarParams()\n",
|
|
"print(p)\n",
|
|
"\n",
|
|
"\n",
|
|
"def control_fn(time):\n",
|
|
" # for the first minute, go at 15 m/s\n",
|
|
" return 10 + time * 10/60\n",
|
|
"\n",
|
|
"def wrapper(curr_state, _):\n",
|
|
" vel = control_fn(curr_state[1])\n",
|
|
" next_state = forward(curr_state, 0.1, vel, p)\n",
|
|
" return next_state, next_state\n",
|
|
"\n",
|
|
"state_init = jnp.array([0.0, 0.0, 45.5e6])\n",
|
|
"_, out = jax.lax.scan(wrapper, state_init, None, length=1000)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7101ac1667b0>]"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 2 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"fig, ax1 = plt.subplots()\n",
|
|
"ax2 = ax1.twinx()\n",
|
|
"x = out[:,1]\n",
|
|
"ax1.plot(x, out[:,0], label=\"position\")\n",
|
|
"ax2.plot(x, out[:,2], label=\"energy\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"x = jnp.linspace(0,30, 1000)\n",
|
|
"dragf = drag_force(x, 1.3, 0.18, 1.184)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7101a4010b30>]"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.plot(x, dragf)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%run ../src/solarcarsim/noise.py"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"key = random.key(123)\n",
|
|
"fractal_tex = generate_noise_texture(key, 256, 256, \"fractal\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.image.AxesImage at 0x7101a4373b30>"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.imshow(fractal_tex)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pyvista as pv\n",
|
|
"import numpy as np\n",
|
|
"a = np.array(fractal_tex)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/pyvista/plotting/texture.py:682: UserWarning: Expected `image` dtype to be ``np.uint8``. `image` has been copied and converted to np.uint8.\n",
|
|
" warnings.warn(\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"tex = pv.numpy_to_texture(a)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/pyvista/core/utilities/points.py:55: UserWarning: Points is not a float type. This can cause issues when transforming or applying filters. Casting to ``np.float32``. Disable this by passing ``force_float=False``.\n",
|
|
" warnings.warn(\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"x = np.arange(0,256)\n",
|
|
"y = np.arange(0,256)\n",
|
|
"x, y = np.meshgrid(x, y)\n",
|
|
"fig, ax = plt.subplots(subplot_kw={\"projection\": \"3d\"})\n",
|
|
"ax.plot_surface(x,y,a)\n",
|
|
"grid=pv.StructuredGrid(x,y, a * 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x7100e506cf80>]"
|
|
]
|
|
},
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"key = random.key(123)\n",
|
|
"key, subkey = random.split(key)\n",
|
|
"y = generate_elevation_profile(subkey, 100, scale=100)\n",
|
|
"plt.plot(y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.image.AxesImage at 0x7100e4f72db0>"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"key, subkey = random.split(key)\n",
|
|
"y = generate_wind_field(subkey, 100, 100)\n",
|
|
"plt.imshow(y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 1000x400 with 3 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"%run ../src/solarcarsim/physsim.py\n",
|
|
"from jax import random\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"plt.rcParams.update({\n",
|
|
" \"text.usetex\": True,\n",
|
|
"})\n",
|
|
"wind, elevation, slope = make_environment(random.key(123))\n",
|
|
"fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10,4))\n",
|
|
"fig.set_tight_layout('auto')\n",
|
|
"fig.suptitle(\"Generated Environment\")\n",
|
|
"\n",
|
|
"ax1.imshow(wind, aspect='auto')\n",
|
|
"ax1.set_title(\"Wind Map\")\n",
|
|
"ax1.set_ylabel(\"Time (sec)\")\n",
|
|
"ax1.set_xlabel(\"Distance (m)\")\n",
|
|
"\n",
|
|
"\n",
|
|
"ax2.set_title(\"Terrain\")\n",
|
|
"ax_slope = ax2.twinx()\n",
|
|
"\n",
|
|
"ax2.plot(elevation, label=\"Elevation\")\n",
|
|
"ax2.set_ylabel(\"Elevation (m)\")\n",
|
|
"ax2.set_xlabel(\"Distance (m)\")\n",
|
|
"ax_slope.plot(slope, color='r', label='Slope')\n",
|
|
"ax_slope.set_ylabel(\"Slope (rad)\")\n",
|
|
"ax2.legend(loc=2)\n",
|
|
"ax_slope.legend(loc=1)\n",
|
|
"fig.savefig(\"environment.pdf\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.image.AxesImage at 0x7100e4dcb050>"
|
|
]
|
|
},
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 2 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# testing the indexing into the wind array.\n",
|
|
"ax2.legend()\n",
|
|
"# given an array of shape (10,2)\n",
|
|
"# return an array of (10,100,100)\n",
|
|
"key = random.key(0)\n",
|
|
"@jit\n",
|
|
"def lookup(x):\n",
|
|
" return lax.dynamic_slice(wind, x, (100, 100))\n",
|
|
"vlookup = vmap(lookup)\n",
|
|
"res = vlookup(jnp.array([[10,20], [9999, 600]]))\n",
|
|
"\n",
|
|
"fig, (ax1, ax2) = plt.subplots(1,2)\n",
|
|
"ax1.imshow(res[0])\n",
|
|
"ax2.imshow(res[1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%run ../src/solarcarsim/simv1.py\n",
|
|
"import gymnasium as gym\n",
|
|
"from gymnasium.wrappers.jax_to_numpy import JaxToNumpy\n",
|
|
"from gymnasium.wrappers.vector import JaxToNumpy as VJaxToNumpy"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/env_checker.py:271: UserWarning: Your observation wind has an unconventional shape (neither an image, nor a 1D vector). We recommend you to flatten the observation to have only a 1D vector or use a custom policy to properly process the data.\n",
|
|
" warnings.warn(\n",
|
|
"/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/gymnasium/utils/env_checker.py:384: UserWarning: \u001b[33mWARN: The environment (<JaxToNumpy<SolarRaceV1 instance>>) is different from the unwrapped version (<SolarRaceV1 instance>). This could effect the environment checker as the environment most likely has a wrapper applied to it. We recommend using the raw environment for `check_env` using `env.unwrapped`.\u001b[0m\n",
|
|
" logger.warn(\n",
|
|
"/home/saji/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/gymnasium/utils/env_checker.py:434: UserWarning: \u001b[33mWARN: Not able to test alternative render modes due to the environment not having a spec. Try instantiating the environment through `gymnasium.make`\u001b[0m\n",
|
|
" logger.warn(\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"env = SolarRaceV1()\n",
|
|
"wrapped_env = JaxToNumpy(env)\n",
|
|
"env.reset()\n",
|
|
"from stable_baselines3.common.env_checker import check_env\n",
|
|
"from gymnasium.utils.env_checker import check_env as gym_check_env\n",
|
|
"check_env(wrapped_env)\n",
|
|
"gym_check_env(wrapped_env)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Using cuda device\n",
|
|
"Wrapping the env with a `Monitor` wrapper\n",
|
|
"Wrapping the env in a DummyVecEnv.\n",
|
|
"---------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.77e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 335 |\n",
|
|
"| iterations | 1 |\n",
|
|
"| time_elapsed | 6 |\n",
|
|
"| total_timesteps | 2048 |\n",
|
|
"---------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.72e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 313 |\n",
|
|
"| iterations | 2 |\n",
|
|
"| time_elapsed | 13 |\n",
|
|
"| total_timesteps | 4096 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.52e+20 |\n",
|
|
"| n_updates | 10 |\n",
|
|
"| policy_gradient_loss | 6.05e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.84e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.55e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 3 |\n",
|
|
"| time_elapsed | 19 |\n",
|
|
"| total_timesteps | 6144 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.0372681e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.5e+20 |\n",
|
|
"| n_updates | 20 |\n",
|
|
"| policy_gradient_loss | -2.82e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.52e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 4 |\n",
|
|
"| time_elapsed | 26 |\n",
|
|
"| total_timesteps | 8192 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.23e+20 |\n",
|
|
"| n_updates | 30 |\n",
|
|
"| policy_gradient_loss | -6.43e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.91e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 5 |\n",
|
|
"| time_elapsed | 33 |\n",
|
|
"| total_timesteps | 10240 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.09e+20 |\n",
|
|
"| n_updates | 40 |\n",
|
|
"| policy_gradient_loss | -1.82e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.55e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 6 |\n",
|
|
"| time_elapsed | 39 |\n",
|
|
"| total_timesteps | 12288 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.86e+20 |\n",
|
|
"| n_updates | 50 |\n",
|
|
"| policy_gradient_loss | 7.23e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.96e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 7 |\n",
|
|
"| time_elapsed | 46 |\n",
|
|
"| total_timesteps | 14336 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.05e+20 |\n",
|
|
"| n_updates | 60 |\n",
|
|
"| policy_gradient_loss | -6.76e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.04e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 8 |\n",
|
|
"| time_elapsed | 53 |\n",
|
|
"| total_timesteps | 16384 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.27e+20 |\n",
|
|
"| n_updates | 70 |\n",
|
|
"| policy_gradient_loss | 6.42e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.66e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 9 |\n",
|
|
"| time_elapsed | 59 |\n",
|
|
"| total_timesteps | 18432 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.38e+20 |\n",
|
|
"| n_updates | 80 |\n",
|
|
"| policy_gradient_loss | -2.08e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.94e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 10 |\n",
|
|
"| time_elapsed | 66 |\n",
|
|
"| total_timesteps | 20480 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.77e+20 |\n",
|
|
"| n_updates | 90 |\n",
|
|
"| policy_gradient_loss | -4.44e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.87e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 11 |\n",
|
|
"| time_elapsed | 73 |\n",
|
|
"| total_timesteps | 22528 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.84e+20 |\n",
|
|
"| n_updates | 100 |\n",
|
|
"| policy_gradient_loss | -3.09e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.7e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 12 |\n",
|
|
"| time_elapsed | 79 |\n",
|
|
"| total_timesteps | 24576 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.47e+20 |\n",
|
|
"| n_updates | 110 |\n",
|
|
"| policy_gradient_loss | 5.43e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.17e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 13 |\n",
|
|
"| time_elapsed | 86 |\n",
|
|
"| total_timesteps | 26624 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.95e+20 |\n",
|
|
"| n_updates | 120 |\n",
|
|
"| policy_gradient_loss | -4.86e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.55e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 14 |\n",
|
|
"| time_elapsed | 92 |\n",
|
|
"| total_timesteps | 28672 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.6e+20 |\n",
|
|
"| n_updates | 130 |\n",
|
|
"| policy_gradient_loss | 4.9e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.01e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 15 |\n",
|
|
"| time_elapsed | 99 |\n",
|
|
"| total_timesteps | 30720 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.84e+20 |\n",
|
|
"| n_updates | 140 |\n",
|
|
"| policy_gradient_loss | -7.74e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.1e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 16 |\n",
|
|
"| time_elapsed | 106 |\n",
|
|
"| total_timesteps | 32768 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.22e+20 |\n",
|
|
"| n_updates | 150 |\n",
|
|
"| policy_gradient_loss | -2.27e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.73e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 17 |\n",
|
|
"| time_elapsed | 112 |\n",
|
|
"| total_timesteps | 34816 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.86e+20 |\n",
|
|
"| n_updates | 160 |\n",
|
|
"| policy_gradient_loss | -6.14e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.47e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 18 |\n",
|
|
"| time_elapsed | 119 |\n",
|
|
"| total_timesteps | 36864 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.05e+20 |\n",
|
|
"| n_updates | 170 |\n",
|
|
"| policy_gradient_loss | 2.55e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.59e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 19 |\n",
|
|
"| time_elapsed | 126 |\n",
|
|
"| total_timesteps | 38912 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.49e+20 |\n",
|
|
"| n_updates | 180 |\n",
|
|
"| policy_gradient_loss | 3.69e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.98e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 20 |\n",
|
|
"| time_elapsed | 132 |\n",
|
|
"| total_timesteps | 40960 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.71e+20 |\n",
|
|
"| n_updates | 190 |\n",
|
|
"| policy_gradient_loss | 1.04e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.52e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 21 |\n",
|
|
"| time_elapsed | 139 |\n",
|
|
"| total_timesteps | 43008 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.89e+20 |\n",
|
|
"| n_updates | 200 |\n",
|
|
"| policy_gradient_loss | -1.36e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.76e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 22 |\n",
|
|
"| time_elapsed | 146 |\n",
|
|
"| total_timesteps | 45056 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.69e+20 |\n",
|
|
"| n_updates | 210 |\n",
|
|
"| policy_gradient_loss | -4.9e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.35e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 23 |\n",
|
|
"| time_elapsed | 153 |\n",
|
|
"| total_timesteps | 47104 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.12e+20 |\n",
|
|
"| n_updates | 220 |\n",
|
|
"| policy_gradient_loss | 2.67e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 24 |\n",
|
|
"| time_elapsed | 159 |\n",
|
|
"| total_timesteps | 49152 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.49e+20 |\n",
|
|
"| n_updates | 230 |\n",
|
|
"| policy_gradient_loss | 4.05e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.71e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 25 |\n",
|
|
"| time_elapsed | 166 |\n",
|
|
"| total_timesteps | 51200 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.78e+20 |\n",
|
|
"| n_updates | 240 |\n",
|
|
"| policy_gradient_loss | 8.82e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.44e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 26 |\n",
|
|
"| time_elapsed | 173 |\n",
|
|
"| total_timesteps | 53248 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.82e+20 |\n",
|
|
"| n_updates | 250 |\n",
|
|
"| policy_gradient_loss | -6.17e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.43e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 27 |\n",
|
|
"| time_elapsed | 179 |\n",
|
|
"| total_timesteps | 55296 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.37e+20 |\n",
|
|
"| n_updates | 260 |\n",
|
|
"| policy_gradient_loss | -6.08e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.17e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 28 |\n",
|
|
"| time_elapsed | 186 |\n",
|
|
"| total_timesteps | 57344 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.01e+20 |\n",
|
|
"| n_updates | 270 |\n",
|
|
"| policy_gradient_loss | -3.07e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.19e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 29 |\n",
|
|
"| time_elapsed | 192 |\n",
|
|
"| total_timesteps | 59392 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.78e+20 |\n",
|
|
"| n_updates | 280 |\n",
|
|
"| policy_gradient_loss | -1.64e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.88e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 30 |\n",
|
|
"| time_elapsed | 199 |\n",
|
|
"| total_timesteps | 61440 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.14e+20 |\n",
|
|
"| n_updates | 290 |\n",
|
|
"| policy_gradient_loss | 1.87e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.11e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 31 |\n",
|
|
"| time_elapsed | 206 |\n",
|
|
"| total_timesteps | 63488 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.92e+20 |\n",
|
|
"| n_updates | 300 |\n",
|
|
"| policy_gradient_loss | 2.99e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.51e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 32 |\n",
|
|
"| time_elapsed | 212 |\n",
|
|
"| total_timesteps | 65536 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.27e+20 |\n",
|
|
"| n_updates | 310 |\n",
|
|
"| policy_gradient_loss | 4.27e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.65e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 33 |\n",
|
|
"| time_elapsed | 219 |\n",
|
|
"| total_timesteps | 67584 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.35e+20 |\n",
|
|
"| n_updates | 320 |\n",
|
|
"| policy_gradient_loss | -7.1e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.61e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 34 |\n",
|
|
"| time_elapsed | 225 |\n",
|
|
"| total_timesteps | 69632 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.07e+20 |\n",
|
|
"| n_updates | 330 |\n",
|
|
"| policy_gradient_loss | 4.04e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.28e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 35 |\n",
|
|
"| time_elapsed | 232 |\n",
|
|
"| total_timesteps | 71680 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.25e+20 |\n",
|
|
"| n_updates | 340 |\n",
|
|
"| policy_gradient_loss | -1.33e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.93e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 36 |\n",
|
|
"| time_elapsed | 239 |\n",
|
|
"| total_timesteps | 73728 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.7e+20 |\n",
|
|
"| n_updates | 350 |\n",
|
|
"| policy_gradient_loss | 5.44e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.46e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 37 |\n",
|
|
"| time_elapsed | 245 |\n",
|
|
"| total_timesteps | 75776 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.51e+20 |\n",
|
|
"| n_updates | 360 |\n",
|
|
"| policy_gradient_loss | 7.37e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.79e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 38 |\n",
|
|
"| time_elapsed | 252 |\n",
|
|
"| total_timesteps | 77824 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.31e+20 |\n",
|
|
"| n_updates | 370 |\n",
|
|
"| policy_gradient_loss | 7.54e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.89e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 39 |\n",
|
|
"| time_elapsed | 258 |\n",
|
|
"| total_timesteps | 79872 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.61e+20 |\n",
|
|
"| n_updates | 380 |\n",
|
|
"| policy_gradient_loss | -1.65e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.02e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 40 |\n",
|
|
"| time_elapsed | 265 |\n",
|
|
"| total_timesteps | 81920 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.13e+20 |\n",
|
|
"| n_updates | 390 |\n",
|
|
"| policy_gradient_loss | -1.05e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.72e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 41 |\n",
|
|
"| time_elapsed | 271 |\n",
|
|
"| total_timesteps | 83968 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.44e+20 |\n",
|
|
"| n_updates | 400 |\n",
|
|
"| policy_gradient_loss | 9.9e-11 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.51e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 42 |\n",
|
|
"| time_elapsed | 278 |\n",
|
|
"| total_timesteps | 86016 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.82e+20 |\n",
|
|
"| n_updates | 410 |\n",
|
|
"| policy_gradient_loss | 3.73e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.65e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 43 |\n",
|
|
"| time_elapsed | 285 |\n",
|
|
"| total_timesteps | 88064 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.15e+20 |\n",
|
|
"| n_updates | 420 |\n",
|
|
"| policy_gradient_loss | 1.07e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.22e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 44 |\n",
|
|
"| time_elapsed | 291 |\n",
|
|
"| total_timesteps | 90112 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.26e+20 |\n",
|
|
"| n_updates | 430 |\n",
|
|
"| policy_gradient_loss | -6.7e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.38e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 45 |\n",
|
|
"| time_elapsed | 298 |\n",
|
|
"| total_timesteps | 92160 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.17e+20 |\n",
|
|
"| n_updates | 440 |\n",
|
|
"| policy_gradient_loss | -3.11e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.41e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 46 |\n",
|
|
"| time_elapsed | 304 |\n",
|
|
"| total_timesteps | 94208 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.36e+20 |\n",
|
|
"| n_updates | 450 |\n",
|
|
"| policy_gradient_loss | -1.6e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.61e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 47 |\n",
|
|
"| time_elapsed | 311 |\n",
|
|
"| total_timesteps | 96256 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.51e+20 |\n",
|
|
"| n_updates | 460 |\n",
|
|
"| policy_gradient_loss | -2.65e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.34e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 48 |\n",
|
|
"| time_elapsed | 318 |\n",
|
|
"| total_timesteps | 98304 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.17e+20 |\n",
|
|
"| n_updates | 470 |\n",
|
|
"| policy_gradient_loss | -2.4e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.91e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 49 |\n",
|
|
"| time_elapsed | 325 |\n",
|
|
"| total_timesteps | 100352 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.99e+20 |\n",
|
|
"| n_updates | 480 |\n",
|
|
"| policy_gradient_loss | -1.58e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.04e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 50 |\n",
|
|
"| time_elapsed | 331 |\n",
|
|
"| total_timesteps | 102400 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.47e+20 |\n",
|
|
"| n_updates | 490 |\n",
|
|
"| policy_gradient_loss | 1.78e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.01e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 51 |\n",
|
|
"| time_elapsed | 338 |\n",
|
|
"| total_timesteps | 104448 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.19e+20 |\n",
|
|
"| n_updates | 500 |\n",
|
|
"| policy_gradient_loss | -4e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.91e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.67e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 52 |\n",
|
|
"| time_elapsed | 345 |\n",
|
|
"| total_timesteps | 106496 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.02e+20 |\n",
|
|
"| n_updates | 510 |\n",
|
|
"| policy_gradient_loss | 7.63e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.35e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 53 |\n",
|
|
"| time_elapsed | 351 |\n",
|
|
"| total_timesteps | 108544 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.7e+20 |\n",
|
|
"| n_updates | 520 |\n",
|
|
"| policy_gradient_loss | -4.46e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.62e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 54 |\n",
|
|
"| time_elapsed | 358 |\n",
|
|
"| total_timesteps | 110592 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.7462298e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.5e+20 |\n",
|
|
"| n_updates | 530 |\n",
|
|
"| policy_gradient_loss | 2.7e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.65e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 55 |\n",
|
|
"| time_elapsed | 365 |\n",
|
|
"| total_timesteps | 112640 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.3283064e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.57e+20 |\n",
|
|
"| n_updates | 540 |\n",
|
|
"| policy_gradient_loss | -4.66e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.58e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.68e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 56 |\n",
|
|
"| time_elapsed | 371 |\n",
|
|
"| total_timesteps | 114688 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.7462298e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.89e+20 |\n",
|
|
"| n_updates | 550 |\n",
|
|
"| policy_gradient_loss | -1.26e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.07e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 57 |\n",
|
|
"| time_elapsed | 378 |\n",
|
|
"| total_timesteps | 116736 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.73e+20 |\n",
|
|
"| n_updates | 560 |\n",
|
|
"| policy_gradient_loss | 3.36e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.93e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 58 |\n",
|
|
"| time_elapsed | 385 |\n",
|
|
"| total_timesteps | 118784 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.0372681e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.49e+20 |\n",
|
|
"| n_updates | 570 |\n",
|
|
"| policy_gradient_loss | 3.67e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.24e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 59 |\n",
|
|
"| time_elapsed | 391 |\n",
|
|
"| total_timesteps | 120832 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.87e+20 |\n",
|
|
"| n_updates | 580 |\n",
|
|
"| policy_gradient_loss | -2.44e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.7e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 60 |\n",
|
|
"| time_elapsed | 398 |\n",
|
|
"| total_timesteps | 122880 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.35e+20 |\n",
|
|
"| n_updates | 590 |\n",
|
|
"| policy_gradient_loss | 9.02e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.98e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 61 |\n",
|
|
"| time_elapsed | 404 |\n",
|
|
"| total_timesteps | 124928 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.22e+20 |\n",
|
|
"| n_updates | 600 |\n",
|
|
"| policy_gradient_loss | -1.74e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.01e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 62 |\n",
|
|
"| time_elapsed | 411 |\n",
|
|
"| total_timesteps | 126976 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.65e+20 |\n",
|
|
"| n_updates | 610 |\n",
|
|
"| policy_gradient_loss | 4.34e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.57e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 63 |\n",
|
|
"| time_elapsed | 418 |\n",
|
|
"| total_timesteps | 129024 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.07e+20 |\n",
|
|
"| n_updates | 620 |\n",
|
|
"| policy_gradient_loss | 8.41e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.21e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 64 |\n",
|
|
"| time_elapsed | 424 |\n",
|
|
"| total_timesteps | 131072 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.94e+20 |\n",
|
|
"| n_updates | 630 |\n",
|
|
"| policy_gradient_loss | 4.04e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.44e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 65 |\n",
|
|
"| time_elapsed | 431 |\n",
|
|
"| total_timesteps | 133120 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.45e+20 |\n",
|
|
"| n_updates | 640 |\n",
|
|
"| policy_gradient_loss | -4.02e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.26e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 66 |\n",
|
|
"| time_elapsed | 437 |\n",
|
|
"| total_timesteps | 135168 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.4551915e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.24e+20 |\n",
|
|
"| n_updates | 650 |\n",
|
|
"| policy_gradient_loss | -7.53e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.25e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 67 |\n",
|
|
"| time_elapsed | 444 |\n",
|
|
"| total_timesteps | 137216 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.6193447e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.49e+20 |\n",
|
|
"| n_updates | 660 |\n",
|
|
"| policy_gradient_loss | -9.75e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.22e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 68 |\n",
|
|
"| time_elapsed | 451 |\n",
|
|
"| total_timesteps | 139264 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.36e+20 |\n",
|
|
"| n_updates | 670 |\n",
|
|
"| policy_gradient_loss | 4.05e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.64e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 69 |\n",
|
|
"| time_elapsed | 457 |\n",
|
|
"| total_timesteps | 141312 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.4e+20 |\n",
|
|
"| n_updates | 680 |\n",
|
|
"| policy_gradient_loss | 2.14e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.54e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 70 |\n",
|
|
"| time_elapsed | 464 |\n",
|
|
"| total_timesteps | 143360 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.52e+20 |\n",
|
|
"| n_updates | 690 |\n",
|
|
"| policy_gradient_loss | 4.44e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.43e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 71 |\n",
|
|
"| time_elapsed | 471 |\n",
|
|
"| total_timesteps | 145408 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.75e+20 |\n",
|
|
"| n_updates | 700 |\n",
|
|
"| policy_gradient_loss | 1.57e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.35e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 72 |\n",
|
|
"| time_elapsed | 478 |\n",
|
|
"| total_timesteps | 147456 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.54e+20 |\n",
|
|
"| n_updates | 710 |\n",
|
|
"| policy_gradient_loss | 3.18e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.9e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 73 |\n",
|
|
"| time_elapsed | 485 |\n",
|
|
"| total_timesteps | 149504 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.12e+20 |\n",
|
|
"| n_updates | 720 |\n",
|
|
"| policy_gradient_loss | -3.43e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 74 |\n",
|
|
"| time_elapsed | 491 |\n",
|
|
"| total_timesteps | 151552 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.43e+20 |\n",
|
|
"| n_updates | 730 |\n",
|
|
"| policy_gradient_loss | 3.68e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.32e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 75 |\n",
|
|
"| time_elapsed | 498 |\n",
|
|
"| total_timesteps | 153600 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.3e+20 |\n",
|
|
"| n_updates | 740 |\n",
|
|
"| policy_gradient_loss | 5.75e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.27e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 76 |\n",
|
|
"| time_elapsed | 505 |\n",
|
|
"| total_timesteps | 155648 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.25e+20 |\n",
|
|
"| n_updates | 750 |\n",
|
|
"| policy_gradient_loss | -7.98e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.22e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 77 |\n",
|
|
"| time_elapsed | 511 |\n",
|
|
"| total_timesteps | 157696 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.46e+20 |\n",
|
|
"| n_updates | 760 |\n",
|
|
"| policy_gradient_loss | -9.47e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.11e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 78 |\n",
|
|
"| time_elapsed | 518 |\n",
|
|
"| total_timesteps | 159744 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.8e+20 |\n",
|
|
"| n_updates | 770 |\n",
|
|
"| policy_gradient_loss | 7.7e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.45e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 79 |\n",
|
|
"| time_elapsed | 525 |\n",
|
|
"| total_timesteps | 161792 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.52e+20 |\n",
|
|
"| n_updates | 780 |\n",
|
|
"| policy_gradient_loss | -1.09e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.03e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 80 |\n",
|
|
"| time_elapsed | 531 |\n",
|
|
"| total_timesteps | 163840 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.44e+20 |\n",
|
|
"| n_updates | 790 |\n",
|
|
"| policy_gradient_loss | -1.34e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.34e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 81 |\n",
|
|
"| time_elapsed | 538 |\n",
|
|
"| total_timesteps | 165888 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.67e+20 |\n",
|
|
"| n_updates | 800 |\n",
|
|
"| policy_gradient_loss | -4.87e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.82e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 82 |\n",
|
|
"| time_elapsed | 545 |\n",
|
|
"| total_timesteps | 167936 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.25e+20 |\n",
|
|
"| n_updates | 810 |\n",
|
|
"| policy_gradient_loss | 6.9e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.02e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 83 |\n",
|
|
"| time_elapsed | 551 |\n",
|
|
"| total_timesteps | 169984 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.73e+20 |\n",
|
|
"| n_updates | 820 |\n",
|
|
"| policy_gradient_loss | 1.06e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.56e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 84 |\n",
|
|
"| time_elapsed | 558 |\n",
|
|
"| total_timesteps | 172032 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.39e+20 |\n",
|
|
"| n_updates | 830 |\n",
|
|
"| policy_gradient_loss | 6.23e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.77e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 85 |\n",
|
|
"| time_elapsed | 565 |\n",
|
|
"| total_timesteps | 174080 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.0372681e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.28e+20 |\n",
|
|
"| n_updates | 840 |\n",
|
|
"| policy_gradient_loss | 2.09e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.28e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 86 |\n",
|
|
"| time_elapsed | 572 |\n",
|
|
"| total_timesteps | 176128 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.7462298e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.39e+20 |\n",
|
|
"| n_updates | 850 |\n",
|
|
"| policy_gradient_loss | 3.62e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.58e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 87 |\n",
|
|
"| time_elapsed | 578 |\n",
|
|
"| total_timesteps | 178176 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.77e+20 |\n",
|
|
"| n_updates | 860 |\n",
|
|
"| policy_gradient_loss | -6e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.84e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 88 |\n",
|
|
"| time_elapsed | 585 |\n",
|
|
"| total_timesteps | 180224 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.77e+20 |\n",
|
|
"| n_updates | 870 |\n",
|
|
"| policy_gradient_loss | -1.66e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.08e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 89 |\n",
|
|
"| time_elapsed | 592 |\n",
|
|
"| total_timesteps | 182272 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.75e+20 |\n",
|
|
"| n_updates | 880 |\n",
|
|
"| policy_gradient_loss | -5.66e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.9e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 90 |\n",
|
|
"| time_elapsed | 598 |\n",
|
|
"| total_timesteps | 184320 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.41e+20 |\n",
|
|
"| n_updates | 890 |\n",
|
|
"| policy_gradient_loss | 1.07e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.39e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.67e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 91 |\n",
|
|
"| time_elapsed | 605 |\n",
|
|
"| total_timesteps | 186368 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.59e+20 |\n",
|
|
"| n_updates | 900 |\n",
|
|
"| policy_gradient_loss | -5.2e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.35e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 92 |\n",
|
|
"| time_elapsed | 612 |\n",
|
|
"| total_timesteps | 188416 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.91e+20 |\n",
|
|
"| n_updates | 910 |\n",
|
|
"| policy_gradient_loss | -1.26e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.09e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 93 |\n",
|
|
"| time_elapsed | 619 |\n",
|
|
"| total_timesteps | 190464 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.86e+20 |\n",
|
|
"| n_updates | 920 |\n",
|
|
"| policy_gradient_loss | -1.74e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.42e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 94 |\n",
|
|
"| time_elapsed | 626 |\n",
|
|
"| total_timesteps | 192512 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.13e+20 |\n",
|
|
"| n_updates | 930 |\n",
|
|
"| policy_gradient_loss | 7.95e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.04e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 95 |\n",
|
|
"| time_elapsed | 633 |\n",
|
|
"| total_timesteps | 194560 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.4551915e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.92e+20 |\n",
|
|
"| n_updates | 940 |\n",
|
|
"| policy_gradient_loss | 1.34e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.42e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.67e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 96 |\n",
|
|
"| time_elapsed | 639 |\n",
|
|
"| total_timesteps | 196608 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.69e+20 |\n",
|
|
"| n_updates | 950 |\n",
|
|
"| policy_gradient_loss | 1.96e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.04e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.67e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 97 |\n",
|
|
"| time_elapsed | 646 |\n",
|
|
"| total_timesteps | 198656 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.81e+20 |\n",
|
|
"| n_updates | 960 |\n",
|
|
"| policy_gradient_loss | 5.85e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.1e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.67e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 98 |\n",
|
|
"| time_elapsed | 653 |\n",
|
|
"| total_timesteps | 200704 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.93e+20 |\n",
|
|
"| n_updates | 970 |\n",
|
|
"| policy_gradient_loss | 4.95e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.2e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.68e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 99 |\n",
|
|
"| time_elapsed | 660 |\n",
|
|
"| total_timesteps | 202752 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.75e+20 |\n",
|
|
"| n_updates | 980 |\n",
|
|
"| policy_gradient_loss | -3.31e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.1e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 100 |\n",
|
|
"| time_elapsed | 666 |\n",
|
|
"| total_timesteps | 204800 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.16e+20 |\n",
|
|
"| n_updates | 990 |\n",
|
|
"| policy_gradient_loss | -4.21e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.06e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 101 |\n",
|
|
"| time_elapsed | 673 |\n",
|
|
"| total_timesteps | 206848 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.1e+20 |\n",
|
|
"| n_updates | 1000 |\n",
|
|
"| policy_gradient_loss | 3.49e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.6e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.7e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 102 |\n",
|
|
"| time_elapsed | 679 |\n",
|
|
"| total_timesteps | 208896 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.46e+20 |\n",
|
|
"| n_updates | 1010 |\n",
|
|
"| policy_gradient_loss | -4.23e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.04e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.7e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 103 |\n",
|
|
"| time_elapsed | 686 |\n",
|
|
"| total_timesteps | 210944 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.53e+20 |\n",
|
|
"| n_updates | 1020 |\n",
|
|
"| policy_gradient_loss | -2.27e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.27e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 104 |\n",
|
|
"| time_elapsed | 693 |\n",
|
|
"| total_timesteps | 212992 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.15e+20 |\n",
|
|
"| n_updates | 1030 |\n",
|
|
"| policy_gradient_loss | -6.14e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.04e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.7e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 105 |\n",
|
|
"| time_elapsed | 699 |\n",
|
|
"| total_timesteps | 215040 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.4551915e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.98e+20 |\n",
|
|
"| n_updates | 1040 |\n",
|
|
"| policy_gradient_loss | 4.92e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.48e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 106 |\n",
|
|
"| time_elapsed | 706 |\n",
|
|
"| total_timesteps | 217088 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.4e+20 |\n",
|
|
"| n_updates | 1050 |\n",
|
|
"| policy_gradient_loss | 2.12e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.6e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.68e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 107 |\n",
|
|
"| time_elapsed | 713 |\n",
|
|
"| total_timesteps | 219136 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.93e+20 |\n",
|
|
"| n_updates | 1060 |\n",
|
|
"| policy_gradient_loss | 1.12e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.37e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.68e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 108 |\n",
|
|
"| time_elapsed | 719 |\n",
|
|
"| total_timesteps | 221184 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.51e+20 |\n",
|
|
"| n_updates | 1070 |\n",
|
|
"| policy_gradient_loss | -7.58e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.43e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 109 |\n",
|
|
"| time_elapsed | 726 |\n",
|
|
"| total_timesteps | 223232 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.42e+20 |\n",
|
|
"| n_updates | 1080 |\n",
|
|
"| policy_gradient_loss | -1.91e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.34e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 110 |\n",
|
|
"| time_elapsed | 733 |\n",
|
|
"| total_timesteps | 225280 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.01e+20 |\n",
|
|
"| n_updates | 1090 |\n",
|
|
"| policy_gradient_loss | -3.17e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.45e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 111 |\n",
|
|
"| time_elapsed | 739 |\n",
|
|
"| total_timesteps | 227328 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.57e+20 |\n",
|
|
"| n_updates | 1100 |\n",
|
|
"| policy_gradient_loss | -1.47e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.72e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 112 |\n",
|
|
"| time_elapsed | 746 |\n",
|
|
"| total_timesteps | 229376 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.44e+20 |\n",
|
|
"| n_updates | 1110 |\n",
|
|
"| policy_gradient_loss | -7.1e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.55e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 113 |\n",
|
|
"| time_elapsed | 753 |\n",
|
|
"| total_timesteps | 231424 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.91e+20 |\n",
|
|
"| n_updates | 1120 |\n",
|
|
"| policy_gradient_loss | 1.29e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.22e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.68e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 114 |\n",
|
|
"| time_elapsed | 760 |\n",
|
|
"| total_timesteps | 233472 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.18e+20 |\n",
|
|
"| n_updates | 1130 |\n",
|
|
"| policy_gradient_loss | -3.49e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.99e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.69e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 115 |\n",
|
|
"| time_elapsed | 766 |\n",
|
|
"| total_timesteps | 235520 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.49e+20 |\n",
|
|
"| n_updates | 1140 |\n",
|
|
"| policy_gradient_loss | -5.62e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.98e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.68e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 116 |\n",
|
|
"| time_elapsed | 773 |\n",
|
|
"| total_timesteps | 237568 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.13e+20 |\n",
|
|
"| n_updates | 1150 |\n",
|
|
"| policy_gradient_loss | 2.58e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.67e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.67e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 117 |\n",
|
|
"| time_elapsed | 779 |\n",
|
|
"| total_timesteps | 239616 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.3283064e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.24e+20 |\n",
|
|
"| n_updates | 1160 |\n",
|
|
"| policy_gradient_loss | -4.64e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.08e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.67e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 118 |\n",
|
|
"| time_elapsed | 786 |\n",
|
|
"| total_timesteps | 241664 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.8e+20 |\n",
|
|
"| n_updates | 1170 |\n",
|
|
"| policy_gradient_loss | -1.2e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.3e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 119 |\n",
|
|
"| time_elapsed | 792 |\n",
|
|
"| total_timesteps | 243712 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.04e+20 |\n",
|
|
"| n_updates | 1180 |\n",
|
|
"| policy_gradient_loss | -3.09e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.8e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 120 |\n",
|
|
"| time_elapsed | 799 |\n",
|
|
"| total_timesteps | 245760 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.45e+20 |\n",
|
|
"| n_updates | 1190 |\n",
|
|
"| policy_gradient_loss | 4.03e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.61e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 121 |\n",
|
|
"| time_elapsed | 805 |\n",
|
|
"| total_timesteps | 247808 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.63e+20 |\n",
|
|
"| n_updates | 1200 |\n",
|
|
"| policy_gradient_loss | -5.43e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.45e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 122 |\n",
|
|
"| time_elapsed | 811 |\n",
|
|
"| total_timesteps | 249856 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.99e+20 |\n",
|
|
"| n_updates | 1210 |\n",
|
|
"| policy_gradient_loss | 1.86e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.59e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 123 |\n",
|
|
"| time_elapsed | 818 |\n",
|
|
"| total_timesteps | 251904 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.85e+20 |\n",
|
|
"| n_updates | 1220 |\n",
|
|
"| policy_gradient_loss | -1.14e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.44e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 124 |\n",
|
|
"| time_elapsed | 824 |\n",
|
|
"| total_timesteps | 253952 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.3e+20 |\n",
|
|
"| n_updates | 1230 |\n",
|
|
"| policy_gradient_loss | 2.21e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.75e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 125 |\n",
|
|
"| time_elapsed | 830 |\n",
|
|
"| total_timesteps | 256000 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.37e+20 |\n",
|
|
"| n_updates | 1240 |\n",
|
|
"| policy_gradient_loss | -4.63e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.79e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 126 |\n",
|
|
"| time_elapsed | 836 |\n",
|
|
"| total_timesteps | 258048 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.11e+20 |\n",
|
|
"| n_updates | 1250 |\n",
|
|
"| policy_gradient_loss | -5.74e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.72e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 127 |\n",
|
|
"| time_elapsed | 843 |\n",
|
|
"| total_timesteps | 260096 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.79e+20 |\n",
|
|
"| n_updates | 1260 |\n",
|
|
"| policy_gradient_loss | 3.38e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.82e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 128 |\n",
|
|
"| time_elapsed | 849 |\n",
|
|
"| total_timesteps | 262144 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.91e+20 |\n",
|
|
"| n_updates | 1270 |\n",
|
|
"| policy_gradient_loss | 3.41e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.62e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 129 |\n",
|
|
"| time_elapsed | 856 |\n",
|
|
"| total_timesteps | 264192 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.33e+20 |\n",
|
|
"| n_updates | 1280 |\n",
|
|
"| policy_gradient_loss | 4.79e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.22e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 130 |\n",
|
|
"| time_elapsed | 862 |\n",
|
|
"| total_timesteps | 266240 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.22e+20 |\n",
|
|
"| n_updates | 1290 |\n",
|
|
"| policy_gradient_loss | -1.57e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.75e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 131 |\n",
|
|
"| time_elapsed | 868 |\n",
|
|
"| total_timesteps | 268288 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.4551915e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.14e+20 |\n",
|
|
"| n_updates | 1300 |\n",
|
|
"| policy_gradient_loss | -3.55e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.36e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 132 |\n",
|
|
"| time_elapsed | 875 |\n",
|
|
"| total_timesteps | 270336 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.27e+20 |\n",
|
|
"| n_updates | 1310 |\n",
|
|
"| policy_gradient_loss | 6.04e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.36e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 133 |\n",
|
|
"| time_elapsed | 881 |\n",
|
|
"| total_timesteps | 272384 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.09e+20 |\n",
|
|
"| n_updates | 1320 |\n",
|
|
"| policy_gradient_loss | -5.18e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.34e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 134 |\n",
|
|
"| time_elapsed | 887 |\n",
|
|
"| total_timesteps | 274432 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.58e+20 |\n",
|
|
"| n_updates | 1330 |\n",
|
|
"| policy_gradient_loss | -1.22e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.27e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 135 |\n",
|
|
"| time_elapsed | 894 |\n",
|
|
"| total_timesteps | 276480 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.35e+20 |\n",
|
|
"| n_updates | 1340 |\n",
|
|
"| policy_gradient_loss | -2.39e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.82e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 136 |\n",
|
|
"| time_elapsed | 900 |\n",
|
|
"| total_timesteps | 278528 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.12e+20 |\n",
|
|
"| n_updates | 1350 |\n",
|
|
"| policy_gradient_loss | -2.61e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.26e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 137 |\n",
|
|
"| time_elapsed | 907 |\n",
|
|
"| total_timesteps | 280576 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.59e+20 |\n",
|
|
"| n_updates | 1360 |\n",
|
|
"| policy_gradient_loss | -4.31e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.52e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 138 |\n",
|
|
"| time_elapsed | 913 |\n",
|
|
"| total_timesteps | 282624 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.44e+20 |\n",
|
|
"| n_updates | 1370 |\n",
|
|
"| policy_gradient_loss | 1.35e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.26e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 139 |\n",
|
|
"| time_elapsed | 919 |\n",
|
|
"| total_timesteps | 284672 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.68e+20 |\n",
|
|
"| n_updates | 1380 |\n",
|
|
"| policy_gradient_loss | 4.9e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.15e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 140 |\n",
|
|
"| time_elapsed | 926 |\n",
|
|
"| total_timesteps | 286720 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.53e+20 |\n",
|
|
"| n_updates | 1390 |\n",
|
|
"| policy_gradient_loss | -3.98e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.89e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 141 |\n",
|
|
"| time_elapsed | 932 |\n",
|
|
"| total_timesteps | 288768 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 5.04e+20 |\n",
|
|
"| n_updates | 1400 |\n",
|
|
"| policy_gradient_loss | 3.41e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.59e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 142 |\n",
|
|
"| time_elapsed | 938 |\n",
|
|
"| total_timesteps | 290816 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.64e+20 |\n",
|
|
"| n_updates | 1410 |\n",
|
|
"| policy_gradient_loss | -2.99e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.82e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 143 |\n",
|
|
"| time_elapsed | 944 |\n",
|
|
"| total_timesteps | 292864 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.05e+20 |\n",
|
|
"| n_updates | 1420 |\n",
|
|
"| policy_gradient_loss | -1.03e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.54e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 144 |\n",
|
|
"| time_elapsed | 951 |\n",
|
|
"| total_timesteps | 294912 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.29e+20 |\n",
|
|
"| n_updates | 1430 |\n",
|
|
"| policy_gradient_loss | -5.52e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.57e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 145 |\n",
|
|
"| time_elapsed | 957 |\n",
|
|
"| total_timesteps | 296960 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.79e+20 |\n",
|
|
"| n_updates | 1440 |\n",
|
|
"| policy_gradient_loss | 9.34e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.39e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 146 |\n",
|
|
"| time_elapsed | 963 |\n",
|
|
"| total_timesteps | 299008 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.6e+20 |\n",
|
|
"| n_updates | 1450 |\n",
|
|
"| policy_gradient_loss | -1.21e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.85e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 147 |\n",
|
|
"| time_elapsed | 970 |\n",
|
|
"| total_timesteps | 301056 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.8e+20 |\n",
|
|
"| n_updates | 1460 |\n",
|
|
"| policy_gradient_loss | -2.56e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.97e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 148 |\n",
|
|
"| time_elapsed | 976 |\n",
|
|
"| total_timesteps | 303104 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.77e+20 |\n",
|
|
"| n_updates | 1470 |\n",
|
|
"| policy_gradient_loss | -5.72e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.11e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 149 |\n",
|
|
"| time_elapsed | 983 |\n",
|
|
"| total_timesteps | 305152 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.35e+20 |\n",
|
|
"| n_updates | 1480 |\n",
|
|
"| policy_gradient_loss | -3.17e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.27e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 150 |\n",
|
|
"| time_elapsed | 989 |\n",
|
|
"| total_timesteps | 307200 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.86e+20 |\n",
|
|
"| n_updates | 1490 |\n",
|
|
"| policy_gradient_loss | -1.05e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.64e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 151 |\n",
|
|
"| time_elapsed | 995 |\n",
|
|
"| total_timesteps | 309248 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.36e+20 |\n",
|
|
"| n_updates | 1500 |\n",
|
|
"| policy_gradient_loss | -5.21e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.21e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 152 |\n",
|
|
"| time_elapsed | 1002 |\n",
|
|
"| total_timesteps | 311296 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.7462298e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4e+20 |\n",
|
|
"| n_updates | 1510 |\n",
|
|
"| policy_gradient_loss | 2.01e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.84e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 153 |\n",
|
|
"| time_elapsed | 1008 |\n",
|
|
"| total_timesteps | 313344 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.96e+20 |\n",
|
|
"| n_updates | 1520 |\n",
|
|
"| policy_gradient_loss | 1.63e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.87e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 154 |\n",
|
|
"| time_elapsed | 1014 |\n",
|
|
"| total_timesteps | 315392 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.64e+20 |\n",
|
|
"| n_updates | 1530 |\n",
|
|
"| policy_gradient_loss | -3.11e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.15e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 155 |\n",
|
|
"| time_elapsed | 1020 |\n",
|
|
"| total_timesteps | 317440 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.85e+20 |\n",
|
|
"| n_updates | 1540 |\n",
|
|
"| policy_gradient_loss | -8.58e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.28e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 156 |\n",
|
|
"| time_elapsed | 1027 |\n",
|
|
"| total_timesteps | 319488 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.16e+20 |\n",
|
|
"| n_updates | 1550 |\n",
|
|
"| policy_gradient_loss | -2.01e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.75e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 157 |\n",
|
|
"| time_elapsed | 1033 |\n",
|
|
"| total_timesteps | 321536 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.25e+20 |\n",
|
|
"| n_updates | 1560 |\n",
|
|
"| policy_gradient_loss | -1.96e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.13e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 158 |\n",
|
|
"| time_elapsed | 1040 |\n",
|
|
"| total_timesteps | 323584 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.72e+20 |\n",
|
|
"| n_updates | 1570 |\n",
|
|
"| policy_gradient_loss | -2.49e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.97e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 159 |\n",
|
|
"| time_elapsed | 1046 |\n",
|
|
"| total_timesteps | 325632 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.67e+20 |\n",
|
|
"| n_updates | 1580 |\n",
|
|
"| policy_gradient_loss | -1.06e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.72e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 160 |\n",
|
|
"| time_elapsed | 1052 |\n",
|
|
"| total_timesteps | 327680 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.86e+20 |\n",
|
|
"| n_updates | 1590 |\n",
|
|
"| policy_gradient_loss | -3.42e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.59e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 161 |\n",
|
|
"| time_elapsed | 1058 |\n",
|
|
"| total_timesteps | 329728 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.92e+20 |\n",
|
|
"| n_updates | 1600 |\n",
|
|
"| policy_gradient_loss | 4.93e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.36e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 162 |\n",
|
|
"| time_elapsed | 1065 |\n",
|
|
"| total_timesteps | 331776 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.62e+20 |\n",
|
|
"| n_updates | 1610 |\n",
|
|
"| policy_gradient_loss | -2.09e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.25e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 163 |\n",
|
|
"| time_elapsed | 1072 |\n",
|
|
"| total_timesteps | 333824 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.58e+20 |\n",
|
|
"| n_updates | 1620 |\n",
|
|
"| policy_gradient_loss | -5.06e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.1e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 164 |\n",
|
|
"| time_elapsed | 1078 |\n",
|
|
"| total_timesteps | 335872 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.62e+20 |\n",
|
|
"| n_updates | 1630 |\n",
|
|
"| policy_gradient_loss | 4.85e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.93e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 165 |\n",
|
|
"| time_elapsed | 1084 |\n",
|
|
"| total_timesteps | 337920 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.7462298e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.69e+20 |\n",
|
|
"| n_updates | 1640 |\n",
|
|
"| policy_gradient_loss | -3.42e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.06e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 166 |\n",
|
|
"| time_elapsed | 1091 |\n",
|
|
"| total_timesteps | 339968 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.86e+20 |\n",
|
|
"| n_updates | 1650 |\n",
|
|
"| policy_gradient_loss | -1.39e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.25e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 167 |\n",
|
|
"| time_elapsed | 1097 |\n",
|
|
"| total_timesteps | 342016 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.36e+20 |\n",
|
|
"| n_updates | 1660 |\n",
|
|
"| policy_gradient_loss | 2.51e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.95e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 168 |\n",
|
|
"| time_elapsed | 1103 |\n",
|
|
"| total_timesteps | 344064 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.84e+20 |\n",
|
|
"| n_updates | 1670 |\n",
|
|
"| policy_gradient_loss | 1.31e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.74e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 169 |\n",
|
|
"| time_elapsed | 1110 |\n",
|
|
"| total_timesteps | 346112 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.85e+20 |\n",
|
|
"| n_updates | 1680 |\n",
|
|
"| policy_gradient_loss | 2.5e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.09e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 170 |\n",
|
|
"| time_elapsed | 1116 |\n",
|
|
"| total_timesteps | 348160 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.58e+20 |\n",
|
|
"| n_updates | 1690 |\n",
|
|
"| policy_gradient_loss | -1.57e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.12e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 171 |\n",
|
|
"| time_elapsed | 1123 |\n",
|
|
"| total_timesteps | 350208 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.88e+20 |\n",
|
|
"| n_updates | 1700 |\n",
|
|
"| policy_gradient_loss | -2.62e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.43e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 172 |\n",
|
|
"| time_elapsed | 1129 |\n",
|
|
"| total_timesteps | 352256 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.39e+20 |\n",
|
|
"| n_updates | 1710 |\n",
|
|
"| policy_gradient_loss | -5.16e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.11e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 173 |\n",
|
|
"| time_elapsed | 1136 |\n",
|
|
"| total_timesteps | 354304 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.92e+20 |\n",
|
|
"| n_updates | 1720 |\n",
|
|
"| policy_gradient_loss | 1.08e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.98e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 174 |\n",
|
|
"| time_elapsed | 1143 |\n",
|
|
"| total_timesteps | 356352 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.47e+20 |\n",
|
|
"| n_updates | 1730 |\n",
|
|
"| policy_gradient_loss | 3.02e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.93e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 175 |\n",
|
|
"| time_elapsed | 1149 |\n",
|
|
"| total_timesteps | 358400 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.88e+20 |\n",
|
|
"| n_updates | 1740 |\n",
|
|
"| policy_gradient_loss | 4.55e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.44e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 176 |\n",
|
|
"| time_elapsed | 1156 |\n",
|
|
"| total_timesteps | 360448 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.04e+20 |\n",
|
|
"| n_updates | 1750 |\n",
|
|
"| policy_gradient_loss | -1.49e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.06e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 177 |\n",
|
|
"| time_elapsed | 1163 |\n",
|
|
"| total_timesteps | 362496 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.63e+20 |\n",
|
|
"| n_updates | 1760 |\n",
|
|
"| policy_gradient_loss | 7.19e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.68e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 178 |\n",
|
|
"| time_elapsed | 1170 |\n",
|
|
"| total_timesteps | 364544 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.4551915e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.73e+20 |\n",
|
|
"| n_updates | 1770 |\n",
|
|
"| policy_gradient_loss | -4.8e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.09e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 179 |\n",
|
|
"| time_elapsed | 1176 |\n",
|
|
"| total_timesteps | 366592 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.99e+20 |\n",
|
|
"| n_updates | 1780 |\n",
|
|
"| policy_gradient_loss | -5.09e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.05e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 180 |\n",
|
|
"| time_elapsed | 1183 |\n",
|
|
"| total_timesteps | 368640 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.6e+20 |\n",
|
|
"| n_updates | 1790 |\n",
|
|
"| policy_gradient_loss | -4.17e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.91e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 181 |\n",
|
|
"| time_elapsed | 1190 |\n",
|
|
"| total_timesteps | 370688 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.23e+20 |\n",
|
|
"| n_updates | 1800 |\n",
|
|
"| policy_gradient_loss | 3.55e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.3e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 182 |\n",
|
|
"| time_elapsed | 1197 |\n",
|
|
"| total_timesteps | 372736 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.88e+20 |\n",
|
|
"| n_updates | 1810 |\n",
|
|
"| policy_gradient_loss | 3.87e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.55e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 183 |\n",
|
|
"| time_elapsed | 1203 |\n",
|
|
"| total_timesteps | 374784 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.95e+20 |\n",
|
|
"| n_updates | 1820 |\n",
|
|
"| policy_gradient_loss | -5.22e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.24e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 184 |\n",
|
|
"| time_elapsed | 1210 |\n",
|
|
"| total_timesteps | 376832 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.49e+20 |\n",
|
|
"| n_updates | 1830 |\n",
|
|
"| policy_gradient_loss | 3.16e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.13e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 185 |\n",
|
|
"| time_elapsed | 1217 |\n",
|
|
"| total_timesteps | 378880 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.05e+20 |\n",
|
|
"| n_updates | 1840 |\n",
|
|
"| policy_gradient_loss | 8.54e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.17e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 186 |\n",
|
|
"| time_elapsed | 1223 |\n",
|
|
"| total_timesteps | 380928 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.91e+20 |\n",
|
|
"| n_updates | 1850 |\n",
|
|
"| policy_gradient_loss | 4.47e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.75e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.66e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 187 |\n",
|
|
"| time_elapsed | 1230 |\n",
|
|
"| total_timesteps | 382976 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.65e+20 |\n",
|
|
"| n_updates | 1860 |\n",
|
|
"| policy_gradient_loss | 1.96e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.6e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 311 |\n",
|
|
"| iterations | 188 |\n",
|
|
"| time_elapsed | 1237 |\n",
|
|
"| total_timesteps | 385024 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.39e+20 |\n",
|
|
"| n_updates | 1870 |\n",
|
|
"| policy_gradient_loss | -4.21e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.77e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 189 |\n",
|
|
"| time_elapsed | 1244 |\n",
|
|
"| total_timesteps | 387072 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.16e+20 |\n",
|
|
"| n_updates | 1880 |\n",
|
|
"| policy_gradient_loss | -2.6e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.65e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 190 |\n",
|
|
"| time_elapsed | 1251 |\n",
|
|
"| total_timesteps | 389120 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.93e+20 |\n",
|
|
"| n_updates | 1890 |\n",
|
|
"| policy_gradient_loss | 1.39e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.18e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.65e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 191 |\n",
|
|
"| time_elapsed | 1258 |\n",
|
|
"| total_timesteps | 391168 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.31e+20 |\n",
|
|
"| n_updates | 1900 |\n",
|
|
"| policy_gradient_loss | -1.09e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.92e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.64e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 192 |\n",
|
|
"| time_elapsed | 1265 |\n",
|
|
"| total_timesteps | 393216 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.42e+20 |\n",
|
|
"| n_updates | 1910 |\n",
|
|
"| policy_gradient_loss | 3.49e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.98e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 193 |\n",
|
|
"| time_elapsed | 1273 |\n",
|
|
"| total_timesteps | 395264 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.98e+20 |\n",
|
|
"| n_updates | 1920 |\n",
|
|
"| policy_gradient_loss | -9.79e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.83e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.63e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 194 |\n",
|
|
"| time_elapsed | 1280 |\n",
|
|
"| total_timesteps | 397312 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.5e+20 |\n",
|
|
"| n_updates | 1930 |\n",
|
|
"| policy_gradient_loss | 1.64e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.09e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 195 |\n",
|
|
"| time_elapsed | 1286 |\n",
|
|
"| total_timesteps | 399360 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.15e+20 |\n",
|
|
"| n_updates | 1940 |\n",
|
|
"| policy_gradient_loss | 1.91e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.95e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 196 |\n",
|
|
"| time_elapsed | 1293 |\n",
|
|
"| total_timesteps | 401408 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.39e+20 |\n",
|
|
"| n_updates | 1950 |\n",
|
|
"| policy_gradient_loss | -3.75e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.44e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 197 |\n",
|
|
"| time_elapsed | 1300 |\n",
|
|
"| total_timesteps | 403456 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.92e+20 |\n",
|
|
"| n_updates | 1960 |\n",
|
|
"| policy_gradient_loss | 2.71e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.42e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 198 |\n",
|
|
"| time_elapsed | 1307 |\n",
|
|
"| total_timesteps | 405504 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.41e+20 |\n",
|
|
"| n_updates | 1970 |\n",
|
|
"| policy_gradient_loss | 2.62e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.96e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 199 |\n",
|
|
"| time_elapsed | 1314 |\n",
|
|
"| total_timesteps | 407552 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.46e+20 |\n",
|
|
"| n_updates | 1980 |\n",
|
|
"| policy_gradient_loss | 6.41e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.64e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.62e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 310 |\n",
|
|
"| iterations | 200 |\n",
|
|
"| time_elapsed | 1321 |\n",
|
|
"| total_timesteps | 409600 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.18e+20 |\n",
|
|
"| n_updates | 1990 |\n",
|
|
"| policy_gradient_loss | -2.67e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.2e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 201 |\n",
|
|
"| time_elapsed | 1328 |\n",
|
|
"| total_timesteps | 411648 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.13e+20 |\n",
|
|
"| n_updates | 2000 |\n",
|
|
"| policy_gradient_loss | 7.92e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.34e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 202 |\n",
|
|
"| time_elapsed | 1335 |\n",
|
|
"| total_timesteps | 413696 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.53e+20 |\n",
|
|
"| n_updates | 2010 |\n",
|
|
"| policy_gradient_loss | -3.51e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.15e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 203 |\n",
|
|
"| time_elapsed | 1341 |\n",
|
|
"| total_timesteps | 415744 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.48e+20 |\n",
|
|
"| n_updates | 2020 |\n",
|
|
"| policy_gradient_loss | 1.69e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.11e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.61e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 204 |\n",
|
|
"| time_elapsed | 1348 |\n",
|
|
"| total_timesteps | 417792 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.07e+20 |\n",
|
|
"| n_updates | 2030 |\n",
|
|
"| policy_gradient_loss | -4.74e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.08e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.6e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 205 |\n",
|
|
"| time_elapsed | 1355 |\n",
|
|
"| total_timesteps | 419840 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.17e+20 |\n",
|
|
"| n_updates | 2040 |\n",
|
|
"| policy_gradient_loss | -3.94e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.87e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 206 |\n",
|
|
"| time_elapsed | 1362 |\n",
|
|
"| total_timesteps | 421888 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.76e+20 |\n",
|
|
"| n_updates | 2050 |\n",
|
|
"| policy_gradient_loss | 3.55e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.46e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 207 |\n",
|
|
"| time_elapsed | 1369 |\n",
|
|
"| total_timesteps | 423936 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.56e+20 |\n",
|
|
"| n_updates | 2060 |\n",
|
|
"| policy_gradient_loss | -1.75e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.04e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 208 |\n",
|
|
"| time_elapsed | 1376 |\n",
|
|
"| total_timesteps | 425984 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.79e+20 |\n",
|
|
"| n_updates | 2070 |\n",
|
|
"| policy_gradient_loss | -1.17e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.1e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 209 |\n",
|
|
"| time_elapsed | 1383 |\n",
|
|
"| total_timesteps | 428032 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.6193447e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.41e+20 |\n",
|
|
"| n_updates | 2080 |\n",
|
|
"| policy_gradient_loss | 9.74e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.75e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 210 |\n",
|
|
"| time_elapsed | 1390 |\n",
|
|
"| total_timesteps | 430080 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.7462298e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.14e+20 |\n",
|
|
"| n_updates | 2090 |\n",
|
|
"| policy_gradient_loss | -5.02e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.2e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 211 |\n",
|
|
"| time_elapsed | 1397 |\n",
|
|
"| total_timesteps | 432128 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 1.7462298e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.71e+20 |\n",
|
|
"| n_updates | 2100 |\n",
|
|
"| policy_gradient_loss | 3.52e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.23e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 212 |\n",
|
|
"| time_elapsed | 1403 |\n",
|
|
"| total_timesteps | 434176 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.44e+20 |\n",
|
|
"| n_updates | 2110 |\n",
|
|
"| policy_gradient_loss | 2.95e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.7e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 213 |\n",
|
|
"| time_elapsed | 1410 |\n",
|
|
"| total_timesteps | 436224 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.98e+20 |\n",
|
|
"| n_updates | 2120 |\n",
|
|
"| policy_gradient_loss | -4.42e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.62e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 214 |\n",
|
|
"| time_elapsed | 1417 |\n",
|
|
"| total_timesteps | 438272 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.47e+20 |\n",
|
|
"| n_updates | 2130 |\n",
|
|
"| policy_gradient_loss | -1.04e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.43e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 309 |\n",
|
|
"| iterations | 215 |\n",
|
|
"| time_elapsed | 1424 |\n",
|
|
"| total_timesteps | 440320 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.59e+20 |\n",
|
|
"| n_updates | 2140 |\n",
|
|
"| policy_gradient_loss | -6.51e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.91e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 216 |\n",
|
|
"| time_elapsed | 1431 |\n",
|
|
"| total_timesteps | 442368 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.57e+20 |\n",
|
|
"| n_updates | 2150 |\n",
|
|
"| policy_gradient_loss | -2.45e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.32e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 217 |\n",
|
|
"| time_elapsed | 1438 |\n",
|
|
"| total_timesteps | 444416 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.72e+20 |\n",
|
|
"| n_updates | 2160 |\n",
|
|
"| policy_gradient_loss | 8.11e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.65e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.56e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 218 |\n",
|
|
"| time_elapsed | 1445 |\n",
|
|
"| total_timesteps | 446464 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.21e+20 |\n",
|
|
"| n_updates | 2170 |\n",
|
|
"| policy_gradient_loss | -4.02e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.93e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.55e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 219 |\n",
|
|
"| time_elapsed | 1452 |\n",
|
|
"| total_timesteps | 448512 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 5.96e-08 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.26e+20 |\n",
|
|
"| n_updates | 2180 |\n",
|
|
"| policy_gradient_loss | 2.76e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.84e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.55e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 220 |\n",
|
|
"| time_elapsed | 1460 |\n",
|
|
"| total_timesteps | 450560 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.45e+20 |\n",
|
|
"| n_updates | 2190 |\n",
|
|
"| policy_gradient_loss | 4.91e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.09e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.56e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 221 |\n",
|
|
"| time_elapsed | 1467 |\n",
|
|
"| total_timesteps | 452608 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.4e+20 |\n",
|
|
"| n_updates | 2200 |\n",
|
|
"| policy_gradient_loss | 2.73e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.29e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 222 |\n",
|
|
"| time_elapsed | 1474 |\n",
|
|
"| total_timesteps | 454656 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.69e+20 |\n",
|
|
"| n_updates | 2210 |\n",
|
|
"| policy_gradient_loss | 3.09e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.16e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.56e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 223 |\n",
|
|
"| time_elapsed | 1481 |\n",
|
|
"| total_timesteps | 456704 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.4e+20 |\n",
|
|
"| n_updates | 2220 |\n",
|
|
"| policy_gradient_loss | -2.49e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.83e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 224 |\n",
|
|
"| time_elapsed | 1487 |\n",
|
|
"| total_timesteps | 458752 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.23e+20 |\n",
|
|
"| n_updates | 2230 |\n",
|
|
"| policy_gradient_loss | 6.27e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.98e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 225 |\n",
|
|
"| time_elapsed | 1494 |\n",
|
|
"| total_timesteps | 460800 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.31e+20 |\n",
|
|
"| n_updates | 2240 |\n",
|
|
"| policy_gradient_loss | 1.68e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.97e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 226 |\n",
|
|
"| time_elapsed | 1501 |\n",
|
|
"| total_timesteps | 462848 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.34e+20 |\n",
|
|
"| n_updates | 2250 |\n",
|
|
"| policy_gradient_loss | 1.34e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.23e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.56e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 227 |\n",
|
|
"| time_elapsed | 1508 |\n",
|
|
"| total_timesteps | 464896 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.18e+20 |\n",
|
|
"| n_updates | 2260 |\n",
|
|
"| policy_gradient_loss | -1.05e-08 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.11e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.55e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 228 |\n",
|
|
"| time_elapsed | 1514 |\n",
|
|
"| total_timesteps | 466944 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.67e+20 |\n",
|
|
"| n_updates | 2270 |\n",
|
|
"| policy_gradient_loss | 9.96e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.88e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 229 |\n",
|
|
"| time_elapsed | 1521 |\n",
|
|
"| total_timesteps | 468992 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.16e+20 |\n",
|
|
"| n_updates | 2280 |\n",
|
|
"| policy_gradient_loss | -1.16e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.4e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.54e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 230 |\n",
|
|
"| time_elapsed | 1528 |\n",
|
|
"| total_timesteps | 471040 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.63e+20 |\n",
|
|
"| n_updates | 2290 |\n",
|
|
"| policy_gradient_loss | 1.45e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.63e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 231 |\n",
|
|
"| time_elapsed | 1534 |\n",
|
|
"| total_timesteps | 473088 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.48e+20 |\n",
|
|
"| n_updates | 2300 |\n",
|
|
"| policy_gradient_loss | -3.34e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.29e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 232 |\n",
|
|
"| time_elapsed | 1541 |\n",
|
|
"| total_timesteps | 475136 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.87e+20 |\n",
|
|
"| n_updates | 2310 |\n",
|
|
"| policy_gradient_loss | -4.51e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.1e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 233 |\n",
|
|
"| time_elapsed | 1548 |\n",
|
|
"| total_timesteps | 477184 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.61e+20 |\n",
|
|
"| n_updates | 2320 |\n",
|
|
"| policy_gradient_loss | 2.87e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.51e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 234 |\n",
|
|
"| time_elapsed | 1554 |\n",
|
|
"| total_timesteps | 479232 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.23e+20 |\n",
|
|
"| n_updates | 2330 |\n",
|
|
"| policy_gradient_loss | -5.88e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.24e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 235 |\n",
|
|
"| time_elapsed | 1562 |\n",
|
|
"| total_timesteps | 481280 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.22e+20 |\n",
|
|
"| n_updates | 2340 |\n",
|
|
"| policy_gradient_loss | -2.38e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.19e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.51e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 236 |\n",
|
|
"| time_elapsed | 1568 |\n",
|
|
"| total_timesteps | 483328 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.3e+20 |\n",
|
|
"| n_updates | 2350 |\n",
|
|
"| policy_gradient_loss | -1.01e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.09e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.51e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 237 |\n",
|
|
"| time_elapsed | 1575 |\n",
|
|
"| total_timesteps | 485376 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.9e+20 |\n",
|
|
"| n_updates | 2360 |\n",
|
|
"| policy_gradient_loss | -2.64e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.85e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 238 |\n",
|
|
"| time_elapsed | 1582 |\n",
|
|
"| total_timesteps | 487424 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.95e+20 |\n",
|
|
"| n_updates | 2370 |\n",
|
|
"| policy_gradient_loss | -3.15e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.31e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 239 |\n",
|
|
"| time_elapsed | 1588 |\n",
|
|
"| total_timesteps | 489472 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.58e+20 |\n",
|
|
"| n_updates | 2380 |\n",
|
|
"| policy_gradient_loss | 5.26e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.31e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 240 |\n",
|
|
"| time_elapsed | 1595 |\n",
|
|
"| total_timesteps | 491520 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.6193447e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.16e+20 |\n",
|
|
"| n_updates | 2390 |\n",
|
|
"| policy_gradient_loss | -2.83e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.82e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 241 |\n",
|
|
"| time_elapsed | 1602 |\n",
|
|
"| total_timesteps | 493568 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.39e+20 |\n",
|
|
"| n_updates | 2400 |\n",
|
|
"| policy_gradient_loss | 2.51e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.5e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 242 |\n",
|
|
"| time_elapsed | 1608 |\n",
|
|
"| total_timesteps | 495616 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.06e+20 |\n",
|
|
"| n_updates | 2410 |\n",
|
|
"| policy_gradient_loss | -6.89e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.06e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.51e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 243 |\n",
|
|
"| time_elapsed | 1615 |\n",
|
|
"| total_timesteps | 497664 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.18e+20 |\n",
|
|
"| n_updates | 2420 |\n",
|
|
"| policy_gradient_loss | -3.41e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.76e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"--------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.49e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 244 |\n",
|
|
"| time_elapsed | 1622 |\n",
|
|
"| total_timesteps | 499712 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -1.1641532e-10 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.97e+20 |\n",
|
|
"| n_updates | 2430 |\n",
|
|
"| policy_gradient_loss | -7.16e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.5e+20 |\n",
|
|
"--------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.51e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 245 |\n",
|
|
"| time_elapsed | 1628 |\n",
|
|
"| total_timesteps | 501760 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.48e+20 |\n",
|
|
"| n_updates | 2440 |\n",
|
|
"| policy_gradient_loss | -2.81e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.7e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.51e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 246 |\n",
|
|
"| time_elapsed | 1635 |\n",
|
|
"| total_timesteps | 503808 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.45e+20 |\n",
|
|
"| n_updates | 2450 |\n",
|
|
"| policy_gradient_loss | 4.88e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.99e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 247 |\n",
|
|
"| time_elapsed | 1641 |\n",
|
|
"| total_timesteps | 505856 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4e+20 |\n",
|
|
"| n_updates | 2460 |\n",
|
|
"| policy_gradient_loss | -3.23e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.21e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 248 |\n",
|
|
"| time_elapsed | 1649 |\n",
|
|
"| total_timesteps | 507904 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.82e+20 |\n",
|
|
"| n_updates | 2470 |\n",
|
|
"| policy_gradient_loss | -6.64e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.33e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 249 |\n",
|
|
"| time_elapsed | 1655 |\n",
|
|
"| total_timesteps | 509952 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.87e+20 |\n",
|
|
"| n_updates | 2480 |\n",
|
|
"| policy_gradient_loss | -2e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.59e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 250 |\n",
|
|
"| time_elapsed | 1662 |\n",
|
|
"| total_timesteps | 512000 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.21e+20 |\n",
|
|
"| n_updates | 2490 |\n",
|
|
"| policy_gradient_loss | 1.76e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.45e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.51e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 251 |\n",
|
|
"| time_elapsed | 1669 |\n",
|
|
"| total_timesteps | 514048 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.16e+20 |\n",
|
|
"| n_updates | 2500 |\n",
|
|
"| policy_gradient_loss | 1.24e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.98e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 252 |\n",
|
|
"| time_elapsed | 1675 |\n",
|
|
"| total_timesteps | 516096 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.71e+20 |\n",
|
|
"| n_updates | 2510 |\n",
|
|
"| policy_gradient_loss | -3.89e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.88e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 253 |\n",
|
|
"| time_elapsed | 1682 |\n",
|
|
"| total_timesteps | 518144 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.46e+20 |\n",
|
|
"| n_updates | 2520 |\n",
|
|
"| policy_gradient_loss | 6.97e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.13e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"--------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 254 |\n",
|
|
"| time_elapsed | 1689 |\n",
|
|
"| total_timesteps | 520192 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.25e+20 |\n",
|
|
"| n_updates | 2530 |\n",
|
|
"| policy_gradient_loss | -3.5e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.1e+20 |\n",
|
|
"--------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.51e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 255 |\n",
|
|
"| time_elapsed | 1695 |\n",
|
|
"| total_timesteps | 522240 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.07e+20 |\n",
|
|
"| n_updates | 2540 |\n",
|
|
"| policy_gradient_loss | -3.22e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.55e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 256 |\n",
|
|
"| time_elapsed | 1702 |\n",
|
|
"| total_timesteps | 524288 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.85e+20 |\n",
|
|
"| n_updates | 2550 |\n",
|
|
"| policy_gradient_loss | 5.49e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.33e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 257 |\n",
|
|
"| time_elapsed | 1708 |\n",
|
|
"| total_timesteps | 526336 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3e+20 |\n",
|
|
"| n_updates | 2560 |\n",
|
|
"| policy_gradient_loss | 3.43e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.02e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.52e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 258 |\n",
|
|
"| time_elapsed | 1715 |\n",
|
|
"| total_timesteps | 528384 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.28e+20 |\n",
|
|
"| n_updates | 2570 |\n",
|
|
"| policy_gradient_loss | -1.44e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.71e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 259 |\n",
|
|
"| time_elapsed | 1722 |\n",
|
|
"| total_timesteps | 530432 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.34e+20 |\n",
|
|
"| n_updates | 2580 |\n",
|
|
"| policy_gradient_loss | -4.22e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.16e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.53e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 260 |\n",
|
|
"| time_elapsed | 1728 |\n",
|
|
"| total_timesteps | 532480 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.15e+20 |\n",
|
|
"| n_updates | 2590 |\n",
|
|
"| policy_gradient_loss | 6.02e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.2e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.55e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 261 |\n",
|
|
"| time_elapsed | 1735 |\n",
|
|
"| total_timesteps | 534528 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -8.731149e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 5e+20 |\n",
|
|
"| n_updates | 2600 |\n",
|
|
"| policy_gradient_loss | 4.48e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.02e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.55e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 262 |\n",
|
|
"| time_elapsed | 1741 |\n",
|
|
"| total_timesteps | 536576 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.15e+20 |\n",
|
|
"| n_updates | 2610 |\n",
|
|
"| policy_gradient_loss | -5.84e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.04e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.55e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 263 |\n",
|
|
"| time_elapsed | 1748 |\n",
|
|
"| total_timesteps | 538624 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.89e+20 |\n",
|
|
"| n_updates | 2620 |\n",
|
|
"| policy_gradient_loss | -3.63e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.62e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 264 |\n",
|
|
"| time_elapsed | 1754 |\n",
|
|
"| total_timesteps | 540672 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.73e+20 |\n",
|
|
"| n_updates | 2630 |\n",
|
|
"| policy_gradient_loss | -2.57e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.07e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 265 |\n",
|
|
"| time_elapsed | 1761 |\n",
|
|
"| total_timesteps | 542720 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.3e+20 |\n",
|
|
"| n_updates | 2640 |\n",
|
|
"| policy_gradient_loss | -5.97e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.42e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 266 |\n",
|
|
"| time_elapsed | 1768 |\n",
|
|
"| total_timesteps | 544768 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.58e+20 |\n",
|
|
"| n_updates | 2650 |\n",
|
|
"| policy_gradient_loss | -1.79e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 8.32e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 267 |\n",
|
|
"| time_elapsed | 1774 |\n",
|
|
"| total_timesteps | 546816 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.5e+20 |\n",
|
|
"| n_updates | 2660 |\n",
|
|
"| policy_gradient_loss | 6.34e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.9e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 268 |\n",
|
|
"| time_elapsed | 1781 |\n",
|
|
"| total_timesteps | 548864 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.39e+20 |\n",
|
|
"| n_updates | 2670 |\n",
|
|
"| policy_gradient_loss | 2.16e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.59e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 269 |\n",
|
|
"| time_elapsed | 1788 |\n",
|
|
"| total_timesteps | 550912 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.3e+20 |\n",
|
|
"| n_updates | 2680 |\n",
|
|
"| policy_gradient_loss | 6.27e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.17e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.57e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 270 |\n",
|
|
"| time_elapsed | 1795 |\n",
|
|
"| total_timesteps | 552960 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.02e+20 |\n",
|
|
"| n_updates | 2690 |\n",
|
|
"| policy_gradient_loss | -1.96e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.08e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 271 |\n",
|
|
"| time_elapsed | 1801 |\n",
|
|
"| total_timesteps | 555008 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.98e+20 |\n",
|
|
"| n_updates | 2700 |\n",
|
|
"| policy_gradient_loss | 4.61e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.76e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 272 |\n",
|
|
"| time_elapsed | 1808 |\n",
|
|
"| total_timesteps | 557056 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 2.9e+20 |\n",
|
|
"| n_updates | 2710 |\n",
|
|
"| policy_gradient_loss | -6.43e-10 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 6.67e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 273 |\n",
|
|
"| time_elapsed | 1815 |\n",
|
|
"| total_timesteps | 559104 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 2.910383e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | 0 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.34e+20 |\n",
|
|
"| n_updates | 2720 |\n",
|
|
"| policy_gradient_loss | 7.83e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.53e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 274 |\n",
|
|
"| time_elapsed | 1821 |\n",
|
|
"| total_timesteps | 561152 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.21e+20 |\n",
|
|
"| n_updates | 2730 |\n",
|
|
"| policy_gradient_loss | -2.04e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.01e+20 |\n",
|
|
"------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.58e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 275 |\n",
|
|
"| time_elapsed | 1828 |\n",
|
|
"| total_timesteps | 563200 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.16e+20 |\n",
|
|
"| n_updates | 2740 |\n",
|
|
"| policy_gradient_loss | -8.44e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.38e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 308 |\n",
|
|
"| iterations | 276 |\n",
|
|
"| time_elapsed | 1835 |\n",
|
|
"| total_timesteps | 565248 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.1e+20 |\n",
|
|
"| n_updates | 2750 |\n",
|
|
"| policy_gradient_loss | 2.26e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.69e+20 |\n",
|
|
"---------------------------------------\n",
|
|
"-------------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 277 |\n",
|
|
"| time_elapsed | 1841 |\n",
|
|
"| total_timesteps | 567296 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | -5.820766e-11 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -1.19e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 4.02e+20 |\n",
|
|
"| n_updates | 2760 |\n",
|
|
"| policy_gradient_loss | -1.79e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.44e+20 |\n",
|
|
"-------------------------------------------\n",
|
|
"---------------------------------------\n",
|
|
"| rollout/ | |\n",
|
|
"| ep_len_mean | 601 |\n",
|
|
"| ep_rew_mean | 8.59e+11 |\n",
|
|
"| time/ | |\n",
|
|
"| fps | 307 |\n",
|
|
"| iterations | 278 |\n",
|
|
"| time_elapsed | 1848 |\n",
|
|
"| total_timesteps | 569344 |\n",
|
|
"| train/ | |\n",
|
|
"| approx_kl | 0.0 |\n",
|
|
"| clip_fraction | 0 |\n",
|
|
"| clip_range | 0.2 |\n",
|
|
"| entropy_loss | -1.42 |\n",
|
|
"| explained_variance | -2.38e-07 |\n",
|
|
"| learning_rate | 0.0003 |\n",
|
|
"| loss | 3.88e+20 |\n",
|
|
"| n_updates | 2770 |\n",
|
|
"| policy_gradient_loss | 3.25e-09 |\n",
|
|
"| std | 1 |\n",
|
|
"| value_loss | 7.91e+20 |\n",
|
|
"---------------------------------------\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "KeyboardInterrupt",
|
|
"evalue": "",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[25], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mstable_baselines3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PPO\n\u001b[1;32m 3\u001b[0m model \u001b[38;5;241m=\u001b[39m PPO(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMultiInputPolicy\u001b[39m\u001b[38;5;124m\"\u001b[39m, wrapped_env, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1_000_000\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/ppo/ppo.py:311\u001b[0m, in \u001b[0;36mPPO.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlearn\u001b[39m(\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28mself\u001b[39m: SelfPPO,\n\u001b[1;32m 304\u001b[0m total_timesteps: \u001b[38;5;28mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 309\u001b[0m progress_bar: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 310\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SelfPPO:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[43m \u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtotal_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 313\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_interval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[43m \u001b[49m\u001b[43mtb_log_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtb_log_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 316\u001b[0m \u001b[43m \u001b[49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 317\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 318\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/on_policy_algorithm.py:336\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mep_info_buffer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dump_logs(iteration)\n\u001b[0;32m--> 336\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m callback\u001b[38;5;241m.\u001b[39mon_training_end()\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/ppo/ppo.py:213\u001b[0m, in \u001b[0;36mPPO.train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_space, spaces\u001b[38;5;241m.\u001b[39mDiscrete):\n\u001b[1;32m 210\u001b[0m \u001b[38;5;66;03m# Convert discrete action from float to long\u001b[39;00m\n\u001b[1;32m 211\u001b[0m actions \u001b[38;5;241m=\u001b[39m rollout_data\u001b[38;5;241m.\u001b[39mactions\u001b[38;5;241m.\u001b[39mlong()\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[0;32m--> 213\u001b[0m values, log_prob, entropy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpolicy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate_actions\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrollout_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mobservations\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mactions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 214\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[1;32m 215\u001b[0m \u001b[38;5;66;03m# Normalize advantage\u001b[39;00m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/stable_baselines3/common/policies.py:739\u001b[0m, in \u001b[0;36mActorCriticPolicy.evaluate_actions\u001b[0;34m(self, obs, actions)\u001b[0m\n\u001b[1;32m 737\u001b[0m distribution \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_action_dist_from_latent(latent_pi)\n\u001b[1;32m 738\u001b[0m log_prob \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mlog_prob(actions)\n\u001b[0;32m--> 739\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue_net\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlatent_vf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 740\u001b[0m entropy \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mentropy()\n\u001b[1;32m 741\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m values, log_prob, entropy\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
|
|
"File \u001b[0;32m~/Documents/Code/solarcarsim/.venv/lib/python3.12/site-packages/torch/nn/modules/linear.py:125\u001b[0m, in \u001b[0;36mLinear.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# import a model and try it out!\n",
|
|
"from stable_baselines3 import PPO\n",
|
|
"model = PPO(\"MultiInputPolicy\", wrapped_env, verbose=1)\n",
|
|
"model.learn(total_timesteps=1_000_000)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Array([[[-4.9999666, -5.031969 , -5.063982 , ..., -6.714849 ,\n",
|
|
" -6.71299 , -6.7111654],\n",
|
|
" [-4.9998884, -5.031891 , -5.0639033, ..., -6.705859 ,\n",
|
|
" -6.7038655, -6.701909 ],\n",
|
|
" [-4.9997377, -5.03174 , -5.0637527, ..., -6.6968226,\n",
|
|
" -6.694696 , -6.692607 ],\n",
|
|
" ...,\n",
|
|
" [-4.8104963, -4.840162 , -4.869858 , ..., -6.505874 ,\n",
|
|
" -6.499703 , -6.4934487],\n",
|
|
" [-4.8117733, -4.8413825, -4.871023 , ..., -6.511339 ,\n",
|
|
" -6.5052385, -6.499054 ],\n",
|
|
" [-4.812991 , -4.8425455, -4.8721304, ..., -6.5165534,\n",
|
|
" -6.510523 , -6.504408 ]]], dtype=float32)"
|
|
]
|
|
},
|
|
"execution_count": 41,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pos = jnp.array([0])\n",
|
|
"time = jnp.array([0])\n",
|
|
"x = jnp.stack([pos,time], axis=1)\n",
|
|
"vlookup(x)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|