simv2

2024-12-15 13:54:23 -06:00 · 2024-12-15 13:54:23 -06:00 · 70a659f468
parent aaac5df404
commit 70a659f468
10 changed files with 835 additions and 164 deletions
--- a/README.md
+++ b/README.md
@ -1 +1,11 @@
 # solarcarsim
+
+
+
+TODO:
+fix wind (velocity + wind for drag)
+make more functional
+cleanup sim code
+parameterize the environment
+vectorize
+cleanrl jax td3
--- a/notebooks/testing.ipynb
+++ b/notebooks/testing.ipynb
@ -0,0 +1,65 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Array([ 0.0000000e+00, -2.5544850e-07, -1.4012958e-06, ...,\n",
+       "       -1.1142221e-02, -1.1067827e-02, -1.1001030e-02], dtype=float32)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import jax\n",
+    "import jax.numpy as jnp\n",
+    "from solarcarsim.physsim import CarParams, fractal_noise_1d\n",
+    "\n",
+    "\n",
+    "key = jax.random.key(0)\n",
+    "\n",
+    "slope = fractal_noise_1d(key, 10000, scale=1200, height_scale=0.08)\n",
+    "\n",
+    "slope"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get an array of positions\n",
+    "positions = jnp.array([1.1,2.2,3.3,5,200.0], dtype=jnp.float32)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/v1gym.ipynb
+++ b/notebooks/v1gym.ipynb
--- a/notebooks/v1sim.ipynb
+++ b/notebooks/v1sim.ipynb
@ -564,7 +564,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@ -576,7 +576,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
--- a/pdm.lock
+++ b/pdm.lock
@ -5,7 +5,7 @@
 groups = ["default", "dev"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:81e26f71acf1a583b21280b235fa2ac16165ac824ae8483bd391b88406421aa4"
+content_hash = "sha256:a3b65f863c554725c33d452fd759776141740661fa3555d306ed08563a7e16e2"

 [[metadata.targets]]
 requires_python = ">=3.12,<3.13"
@ -152,7 +152,7 @@ version = "0.4.6"
 requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 summary = "Cross-platform colored terminal text."
 groups = ["default", "dev"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\" and sys_platform == \"win32\""
+marker = "sys_platform == \"win32\" and python_version >= \"3.12\" and python_version < \"3.13\" or platform_system == \"Windows\" and python_version >= \"3.12\" and python_version < \"3.13\""
 files = [
    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@ -230,13 +230,30 @@ name = "decorator"
 version = "5.1.1"
 requires_python = ">=3.5"
 summary = "Decorators for Humans"
-groups = ["dev"]
+groups = ["default", "dev"]
 marker = "python_version >= \"3.12\" and python_version < \"3.13\""
 files = [
    {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
    {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
 ]

+[[package]]
+name = "dm-tree"
+version = "0.1.8"
+summary = "Tree is a library for working with nested data structures."
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+files = [
+    {file = "dm-tree-0.1.8.tar.gz", hash = "sha256:0fcaabbb14e7980377439e7140bd05552739ca5e515ecb3119f234acee4b9430"},
+    {file = "dm_tree-0.1.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea9e59e0451e7d29aece402d9f908f2e2a80922bcde2ebfd5dcb07750fcbfee8"},
+    {file = "dm_tree-0.1.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:94d3f0826311f45ee19b75f5b48c99466e4218a0489e81c0f0167bda50cacf22"},
+    {file = "dm_tree-0.1.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:435227cf3c5dc63f4de054cf3d00183790bd9ead4c3623138c74dde7f67f521b"},
+    {file = "dm_tree-0.1.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09964470f76a5201aff2e8f9b26842976de7889300676f927930f6285e256760"},
+    {file = "dm_tree-0.1.8-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:75c5d528bb992981c20793b6b453e91560784215dffb8a5440ba999753c14ceb"},
+    {file = "dm_tree-0.1.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a94aba18a35457a1b5cd716fd7b46c5dafdc4cf7869b4bae665b91c4682a8e"},
+    {file = "dm_tree-0.1.8-cp312-cp312-win_amd64.whl", hash = "sha256:96a548a406a6fb15fe58f6a30a57ff2f2aafbf25f05afab00c8f5e5977b6c715"},
+]
+
 [[package]]
 name = "etils"
 version = "1.11.0"
@ -379,6 +396,47 @@ files = [
    {file = "fsspec-2024.10.0.tar.gz", hash = "sha256:eda2d8a4116d4f2429db8550f2457da57279247dd930bb12f821b58391359493"},
 ]

+[[package]]
+name = "gast"
+version = "0.6.0"
+requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
+summary = "Python AST that abstracts the underlying Python version"
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+files = [
+    {file = "gast-0.6.0-py3-none-any.whl", hash = "sha256:52b182313f7330389f72b069ba00f174cfe2a06411099547288839c6cbafbd54"},
+    {file = "gast-0.6.0.tar.gz", hash = "sha256:88fc5300d32c7ac6ca7b515310862f71e6fdf2c029bbec7c66c0f5dd47b6b1fb"},
+]
+
+[[package]]
+name = "gym"
+version = "0.26.2"
+requires_python = ">=3.6"
+summary = "Gym: A universal API for reinforcement learning environments"
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+dependencies = [
+    "cloudpickle>=1.2.0",
+    "dataclasses==0.8; python_version == \"3.6\"",
+    "gym-notices>=0.0.4",
+    "importlib-metadata>=4.8.0; python_version < \"3.10\"",
+    "numpy>=1.18.0",
+]
+files = [
+    {file = "gym-0.26.2.tar.gz", hash = "sha256:e0d882f4b54f0c65f203104c24ab8a38b039f1289986803c7d02cdbe214fbcc4"},
+]
+
+[[package]]
+name = "gym-notices"
+version = "0.0.8"
+summary = "Notices for gym"
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+files = [
+    {file = "gym-notices-0.0.8.tar.gz", hash = "sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911"},
+    {file = "gym_notices-0.0.8-py3-none-any.whl", hash = "sha256:e5f82e00823a166747b4c2a07de63b6560b1acb880638547e0cabf825a01e463"},
+]
+
 [[package]]
 name = "gymnasium"
 version = "1.0.0"
@ -417,6 +475,29 @@ files = [
    {file = "gymnasium-1.0.0.tar.gz", hash = "sha256:9d2b66f30c1b34fe3c2ce7fae65ecf365d0e9982d2b3d860235e773328a3b403"},
 ]

+[[package]]
+name = "gymnax"
+version = "0.0.8"
+requires_python = ">=3.10"
+summary = "JAX-compatible version of Open AI's gym environments"
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+dependencies = [
+    "chex",
+    "flax",
+    "gym>=0.26",
+    "gymnasium",
+    "jax",
+    "jaxlib",
+    "matplotlib",
+    "pyyaml",
+    "seaborn",
+]
+files = [
+    {file = "gymnax-0.0.8-py3-none-any.whl", hash = "sha256:0af7edde1b71d74be8007ffe1e6338f8ce66693b1b78ae479c0c0cd02b10de03"},
+    {file = "gymnax-0.0.8.tar.gz", hash = "sha256:81defc17f52a30a84338b3daa574d7a3bb112f2656f45c783a71efe31eea68ff"},
+]
+
 [[package]]
 name = "humanize"
 version = "4.11.0"
@ -541,76 +622,6 @@ files = [
    {file = "jax-0.4.37.tar.gz", hash = "sha256:7774f3d9e23fe199c65589c680c5a5be87a183b89598421a632d8245222b637b"},
 ]

-[[package]]
-name = "jax-cuda12-pjrt"
-version = "0.4.36"
-summary = "JAX XLA PJRT Plugin for NVIDIA GPUs"
-groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
-files = [
-    {file = "jax_cuda12_pjrt-0.4.36-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1dfc0bec0820ba801b61e9421064b6e58238c430b4ad8f54043323d93c0217c6"},
-    {file = "jax_cuda12_pjrt-0.4.36-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e3c3705d8db7d63da9abfaebf06f5cd0667f5acb0748a5c5eb00d80041e922ed"},
-]
-
-[[package]]
-name = "jax-cuda12-plugin"
-version = "0.4.36"
-requires_python = ">=3.10"
-summary = "JAX Plugin for NVIDIA GPUs"
-groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
-dependencies = [
-    "jax-cuda12-pjrt==0.4.36",
-]
-files = [
-    {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:6a0b0c2bdc1da2eea2c20723a1e8f39b3cda67d24c665de936647e8091f5790d"},
-    {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:5d4727fb519fedc06a9a984d5a0714804d81ef126a2cb60cefd5cbc4a3ea2627"},
-]
-
-[[package]]
-name = "jax-cuda12-plugin"
-version = "0.4.36"
-extras = ["with_cuda"]
-requires_python = ">=3.10"
-summary = "JAX Plugin for NVIDIA GPUs"
-groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
-dependencies = [
-    "jax-cuda12-plugin==0.4.36",
-    "nvidia-cublas-cu12>=12.1.3.1",
-    "nvidia-cuda-cupti-cu12>=12.1.105",
-    "nvidia-cuda-nvcc-cu12>=12.6.85",
-    "nvidia-cuda-runtime-cu12>=12.1.105",
-    "nvidia-cudnn-cu12<10.0,>=9.1",
-    "nvidia-cufft-cu12>=11.0.2.54",
-    "nvidia-cusolver-cu12>=11.4.5.107",
-    "nvidia-cusparse-cu12>=12.1.0.106",
-    "nvidia-nccl-cu12>=2.18.1",
-    "nvidia-nvjitlink-cu12>=12.1.105",
-]
-files = [
-    {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:6a0b0c2bdc1da2eea2c20723a1e8f39b3cda67d24c665de936647e8091f5790d"},
-    {file = "jax_cuda12_plugin-0.4.36-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:5d4727fb519fedc06a9a984d5a0714804d81ef126a2cb60cefd5cbc4a3ea2627"},
-]
-
-[[package]]
-name = "jax"
-version = "0.4.37"
-extras = ["cuda12"]
-requires_python = ">=3.10"
-summary = "Differentiate, compile, and transform Numpy code."
-groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
-dependencies = [
-    "jax-cuda12-plugin[with_cuda]<=0.4.37,>=0.4.36",
-    "jax==0.4.37",
-    "jaxlib==0.4.36",
-]
-files = [
-    {file = "jax-0.4.37-py3-none-any.whl", hash = "sha256:bdc0686d7e5a944e2d38026eae632214d98dd2d91869cbcedbf1c11298ae3e3e"},
-    {file = "jax-0.4.37.tar.gz", hash = "sha256:7774f3d9e23fe199c65589c680c5a5be87a183b89598421a632d8245222b637b"},
-]
-
 [[package]]
 name = "jaxlib"
 version = "0.4.36"
@ -926,7 +937,7 @@ version = "12.4.5.8"
 requires_python = ">=3"
 summary = "CUBLAS native runtime libraries"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 files = [
    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"},
    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"},
@ -939,26 +950,13 @@ version = "12.4.127"
 requires_python = ">=3"
 summary = "CUDA profiling tools runtime libs."
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 files = [
    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"},
    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"},
    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922"},
 ]

-[[package]]
-name = "nvidia-cuda-nvcc-cu12"
-version = "12.6.85"
-requires_python = ">=3"
-summary = "CUDA nvcc"
-groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
-files = [
-    {file = "nvidia_cuda_nvcc_cu12-12.6.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d75d9d74599f4d7c0865df19ed21b739e6cb77a6497a3f73d6f61e8038a765e4"},
-    {file = "nvidia_cuda_nvcc_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5d2edd5531b13e3daac8ffee9fc2b70a147e6088b2af2565924773d63d36d294"},
-    {file = "nvidia_cuda_nvcc_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:aa04742337973dcb5bcccabb590edc8834c60ebfaf971847888d24ffef6c46b5"},
-]
-
 [[package]]
 name = "nvidia-cuda-nvrtc-cu12"
 version = "12.4.127"
@ -978,7 +976,7 @@ version = "12.4.127"
 requires_python = ">=3"
 summary = "CUDA Runtime native Libraries"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 files = [
    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"},
    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"},
@ -991,7 +989,7 @@ version = "9.1.0.70"
 requires_python = ">=3"
 summary = "cuDNN runtime libraries"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 dependencies = [
    "nvidia-cublas-cu12",
 ]
@ -1006,7 +1004,7 @@ version = "11.2.1.3"
 requires_python = ">=3"
 summary = "CUFFT native runtime libraries"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 dependencies = [
    "nvidia-nvjitlink-cu12",
 ]
@ -1035,7 +1033,7 @@ version = "11.6.1.9"
 requires_python = ">=3"
 summary = "CUDA solver native runtime libraries"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 dependencies = [
    "nvidia-cublas-cu12",
    "nvidia-cusparse-cu12",
@ -1053,7 +1051,7 @@ version = "12.3.1.170"
 requires_python = ">=3"
 summary = "CUSPARSE native runtime libraries"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 dependencies = [
    "nvidia-nvjitlink-cu12",
 ]
@ -1069,7 +1067,7 @@ version = "2.21.5"
 requires_python = ">=3"
 summary = "NVIDIA Collective Communication Library (NCCL) Runtime"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 files = [
    {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"},
 ]
@ -1080,7 +1078,7 @@ version = "12.4.127"
 requires_python = ">=3"
 summary = "Nvidia JIT LTO Library"
 groups = ["default"]
-marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+marker = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\" and python_version < \"3.13\""
 files = [
    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
@ -1653,6 +1651,29 @@ files = [
    {file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"},
 ]

+[[package]]
+name = "sbx-rl"
+version = "0.18.0"
+requires_python = ">=3.8"
+summary = "Jax version of Stable Baselines, implementations of reinforcement learning algorithms."
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+dependencies = [
+    "flax",
+    "jax",
+    "jaxlib",
+    "optax; python_version >= \"3.9.0\"",
+    "optax<0.1.8; python_version < \"3.9.0\"",
+    "rich",
+    "stable-baselines3<3.0,>=2.4.0a4",
+    "tensorflow-probability",
+    "tqdm",
+]
+files = [
+    {file = "sbx_rl-0.18.0-py3-none-any.whl", hash = "sha256:75ade634a33555ad4c4a81523bb0f99c89d1b3bc89fb74990ef87b22379abd9c"},
+    {file = "sbx_rl-0.18.0.tar.gz", hash = "sha256:670f2bf095ec21ba6f8171602294baf0123787fe7be6811ebab276fb5010b8b3"},
+]
+
 [[package]]
 name = "scipy"
 version = "1.14.1"
@ -1695,6 +1716,23 @@ files = [
    {file = "scooby-0.10.0.tar.gz", hash = "sha256:7ea33c262c0cc6a33c6eeeb5648df787be4f22660e53c114e5fff1b811a8854f"},
 ]

+[[package]]
+name = "seaborn"
+version = "0.13.2"
+requires_python = ">=3.8"
+summary = "Statistical data visualization"
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+dependencies = [
+    "matplotlib!=3.6.1,>=3.4",
+    "numpy!=1.24.0,>=1.20",
+    "pandas>=1.2",
+]
+files = [
+    {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"},
+    {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"},
+]
+
 [[package]]
 name = "setuptools"
 version = "75.6.0"
@ -1809,6 +1847,26 @@ files = [
    {file = "sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f"},
 ]

+[[package]]
+name = "tensorflow-probability"
+version = "0.25.0"
+requires_python = ">=3.9"
+summary = "Probabilistic modeling and statistical inference in TensorFlow"
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+dependencies = [
+    "absl-py",
+    "cloudpickle>=1.3",
+    "decorator",
+    "dm-tree",
+    "gast>=0.3.2",
+    "numpy>=1.13.3",
+    "six>=1.10.0",
+]
+files = [
+    {file = "tensorflow_probability-0.25.0-py2.py3-none-any.whl", hash = "sha256:f3f4d6431656c0122906888afe1b67b4400e82bd7f254b45b92e6c5b84ea8e3e"},
+]
+
 [[package]]
 name = "tensorstore"
 version = "0.1.71"
@ -1899,6 +1957,21 @@ files = [
    {file = "tornado-6.4.2.tar.gz", hash = "sha256:92bad5b4746e9879fd7bf1eb21dce4e3fc5128d71601f80005afa39237ad620b"},
 ]

+[[package]]
+name = "tqdm"
+version = "4.67.1"
+requires_python = ">=3.7"
+summary = "Fast, Extensible Progress Meter"
+groups = ["default"]
+marker = "python_version >= \"3.12\" and python_version < \"3.13\""
+dependencies = [
+    "colorama; platform_system == \"Windows\"",
+]
+files = [
+    {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
+    {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
+]
+
 [[package]]
 name = "traitlets"
 version = "5.14.3"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,7 @@ description = "A solar car racing simulation library and GUI tool"
 authors = [
    {name = "saji", email = "saji@saji.dev"},
 ]
-dependencies = ["pyqtgraph>=0.13.7", "jax[cuda12]>=0.4.37", "pytest>=8.3.3", "pyside6>=6.8.0.2", "matplotlib>=3.9.2", "gymnasium[jax]>=1.0.0", "pyvista>=0.44.2", "pyvistaqt>=0.11.1", "stable-baselines3>=2.4.0"]
+dependencies = ["pyqtgraph>=0.13.7", "jax>=0.4.37", "pytest>=8.3.3", "pyside6>=6.8.0.2", "matplotlib>=3.9.2", "gymnasium[jax]>=1.0.0", "pyvista>=0.44.2", "pyvistaqt>=0.11.1", "stable-baselines3>=2.4.0", "gymnax>=0.0.8", "sbx-rl>=0.18.0"]
 requires-python = ">=3.10,<3.13"
 readme = "README.md"
 license = {text = "MIT"}
--- a/report/report.tex
+++ b/report/report.tex
@ -0,0 +1,136 @@
+\documentclass[11pt]{article}
+
+% Essential packages
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage{amsmath,amssymb}
+\usepackage{graphicx}
+\usepackage[margin=1in]{geometry}
+\usepackage{hyperref}
+\usepackage{algorithm}
+\usepackage{algorithmic}
+\usepackage{float}
+\usepackage{booktabs}
+\usepackage{caption}
+\usepackage{subcaption}
+
+% Custom commands
+\newcommand{\sectionheading}[1]{\noindent\textbf{#1}}
+
+% Title and author information
+\title{\Large{Your Project Title: A Study in Optimal Control and Reinforcement Learning}}
+\author{Your Name\\
+        Course Name\\
+        Institution}
+\date{\today}
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+Solar Racing is a competition with the goal of creating highly efficient solar-assisted electric vehicles. Effective solar racing
+requires awareness and complex decision making to determine optimal speeds to exploit the environmental conditions, such as winds,
+cloud cover, and changes in elevation. We present an environment modelled on the dynamics involved for a race, including generated
+elevation and wind profiles. The model uses the \texttt{gymnasium} interface to allow it to be used by a variety of algorithms.
+We demonstrate a method of designing reward functions for multi-objective problems. The environment shows to be solvable by modern
+reinforcement learning algorithms.
+\end{abstract}
+
+\section{Introduction}
+Start with a broad context of your problem area in optimal control/reinforcement learning. Then narrow down to your specific focus. Include:
+\begin{itemize}
+    \item Problem motivation
+    \item Brief overview of existing approaches
+    \item Your specific contributions
+    \item Paper organization
+\end{itemize}
+
+Solar racing was invented in the early 90s as a technology incubator for high-efficiency motor vehicles. The first solar races were speed
+focused, however a style of race that focused on minimal energy use within a given route was developed to push focus towards vehicle efficiency.
+The goal of these races is to arrive at a destination within a given time frame, while using as little grid (non-solar) energy as possible.
+Aerodynamic drag is one of the most significant sources of energy consumption, along with elevation changes. The simplest policy to meet
+the constraints of on-time arrival is:
+$$
+V_{\text{avg}} = \frac{D}{T}
+$$
+Where $D$ is the distance needed to travel, and $T$ is the maximum allowed time.
+
+\section{Background}
+Provide necessary background on:
+\begin{itemize}
+    \item Your specific application domain
+    \item Relevant algorithms and methods
+    \item Previous work in this area
+\end{itemize}
+
+\section{Methodology}
+Describe your approach in detail:
+\begin{itemize}
+    \item Problem formulation
+    \item Algorithm description
+    \item Implementation details
+\end{itemize}
+
+% Example of how to include an algorithm
+\begin{algorithm}[H]
+\caption{Your Algorithm Name}
+\begin{algorithmic}[1]
+\STATE Initialize parameters
+\WHILE{not converged}
+    \STATE Update step
+\ENDWHILE
+\RETURN Result
+\end{algorithmic}
+\end{algorithm}
+
+\section{Experiments and Results}
+Present your findings:
+\begin{itemize}
+    \item Experimental setup
+    \item Results and analysis
+    \item Comparison with baselines (if applicable)
+\end{itemize}
+
+% Example of how to include figures
+\begin{figure}[H]
+\centering
+\caption{Description of your figure}
+\label{fig:example}
+\end{figure}
+
+% Example of how to include tables
+\begin{table}[H]
+\centering
+\caption{Your Table Caption}
+\begin{tabular}{lcc}
+\toprule
+Method & Metric 1 & Metric 2 \\
+\midrule
+Approach 1 & Value & Value \\
+Approach 2 & Value & Value \\
+\bottomrule
+\end{tabular}
+\label{tab:results}
+\end{table}
+
+\section{Discussion}
+Analyze your results:
+\begin{itemize}
+    \item Interpretation of findings
+    \item Limitations and challenges
+    \item Potential improvements
+\end{itemize}
+
+\section{Conclusion}
+Summarize your work:
+\begin{itemize}
+    \item Key contributions
+    \item Practical implications
+    \item Future work directions
+\end{itemize}
+
+\bibliography{references}
+\bibliographystyle{plain}
+
+\end{document}
--- a/src/solarcarsim/physsim.py
+++ b/src/solarcarsim/physsim.py
@ -115,7 +115,6 @@ def bldc_power_draw(torque, velocity, params: MotorParams):
    return total_power


-# @partial(jit, static_argnames=['resistance', 'kt', 'kv', 'vmax', 'Cf'])
@jit
 def bldc_torque(velocity, current_limit, resistance, kt, kv, vmax, Cf):
    bemf = velocity / kv
@ -132,7 +131,6 @@ def bldc_torque(velocity, current_limit, resistance, kt, kv, vmax, Cf):
@partial(
    jit,
    static_argnums=(
-        1,
        2,
    ),
 )
@ -144,47 +142,12 @@ def battery_powerloss(current, cell_r, battery_shape: Tuple[int, int]):
    return jnp.sum(cell_Ploss)


-def forward(state, timestep, control, params: CarParams):
-    # state is (position, time, energy)
-    # control is velocity
-    # timestep is >0 time to advance
-    # params is the params dictionary.
-    # returns the next state with (position', time + timestep, energy')
-    # TODO: terrain, weather, solar
-
-    # determine the forces acting on the car.
-    dragf = drag_force(control, params.frontal_area, params.drag_coeff, 1.184)
-    rollf = rolling_force(params.mass, 0, params.rolling_coeff)
-    hillforce = downslope_force(params.mass, 0)
-    totalf = dragf + rollf + hillforce
-    # determine the power needed to make this force
-    tau = params.wheel_radius * totalf
-    pdraw = bldc_power_draw(tau, control, params.motor)
-    net_power = 0 - pdraw  # watts aka j/s
-
-    # TODO: calculate battery-based power losses.
-    # TODO: support regenerative braking when going downhill
-    # TODO: delta x = cos(theta) * velocity * timestep
-
-    new_state = jnp.array(
-        [
-            state[0] + control * timestep,
-            state[1] + timestep,
-            state[2] + net_power * timestep,
-        ]
-    )
-    return new_state
-
-
 def make_environment(seed):
    """Generate a race environment: terrain function, wind function, wrapped forward function."""
-    key, subkey = jax.random.split(seed)
-    wind = generate_wind_field(subkey, 10000, 600, spatial_scale=1000)
-    key, subkey = jax.random.split(key)
-    slope = fractal_noise_1d(subkey, 10000, scale=1200, height_scale=0.08)
+    windkey, slopekey = jax.random.split(seed, 2)
+    wind = generate_wind_field(windkey, 10000, 600, spatial_scale=1000)
+    slope = fractal_noise_1d(slopekey, 10000, scale=1200, height_scale=0.08)
    elevation = jnp.cumsum(slope)
-    # elevation = generate_elevation_profile(subkey, 10000, height_variation=40.0, scale=1200, octaves=5)
-    # slope = jnp.arctan(jnp.diff(elevation, prepend=100.0)) # rise/run

    return wind, elevation, slope

--- a/src/solarcarsim/simv1.py
+++ b/src/solarcarsim/simv1.py
@ -4,11 +4,9 @@ import jax
 import jax.numpy as jnp
 from jax import jit
 from functools import partial
-from solarcarsim.physsim import drag_force, rolling_force, downslope_force, bldc_power_draw
-

@partial(jit, static_argnames=["params"])
-def forwardv2(state, control, delta_time, wind, elevation, slope, params):
+def forward(state, control, delta_time, wind, elevation, slope, params: sim.CarParams):
    pos = jnp.astype(jnp.round(state[0]), "int32")
    time = jnp.astype(jnp.round(state[1]), "int32")
    theta = slope[pos]
@ -23,23 +21,22 @@ def forwardv2(state, control, delta_time, wind, elevation, slope, params):
    totalf = dragf + rollf + hillforce
    # with the sum of forces, determine the needed torque at the wheels, and then power
    tau = params.wheel_radius * totalf
-    pdraw = bldc_power_draw(tau, velocity, params.motor)
+    pdraw = sim.bldc_power_draw(tau, velocity, params.motor)
    # determine the energy needed to do this power for the time step
    net_power = state[2] - delta_time * pdraw  # joules

-    dpos = jnp.cos(theta) * velocity * delta_time
-    dist_remaining = 10000.0 - dpos
+    dpos = state[0] + jnp.cos(theta) * velocity * delta_time
+    new_pos = jnp.maximum(dpos, 0)
+    dist_remaining = 10000.0 - (state[0] + dpos)
    time_remaining = 600 - (state[1] + delta_time)
    return jnp.array(
-        [dpos, state[1] + delta_time, net_power, dist_remaining, time_remaining]
+        [new_pos, state[1] + delta_time, net_power, dist_remaining, time_remaining]
    )


-def reward(state, prev_energy):
-    progress = state[0] / 10000 * 100
-    energy_usage = 10 * (state[2] - prev_energy) # current energy < previous energy.
-    time_factor = (1.0 - (state[1] / 600)) * 50
-    reward = progress + energy_usage + time_factor
+def reward(state, prev_state):
+    reward = 0
+    reward += state[0]/8000
    return reward

 class SolarRaceV1(gym.Env):
@ -53,7 +50,6 @@ class SolarRaceV1(gym.Env):
        # self._state = jnp.array([np.array([x], dtype="float32") for x in (0,0,0, 10000.0, 600.0)])
        self._state = jnp.array([[0],[0],[0],[10000.0], [600.0]])
        # self._state = jnp.array([0, 0,0,10000.0, 600.0])
-    
    def _vision_function(self):
        # extract the vision results.
        def slookup(x):
@ -81,7 +77,7 @@ class SolarRaceV1(gym.Env):
        self._reset_sim(jax.random.key(seed))
        self._timestep = timestep
        self._car = car
-        self._simstep = forwardv2
+        self._simstep = forward
        self._simreward = reward

        self.observation_space = gym.spaces.Dict(
@ -108,15 +104,20 @@ class SolarRaceV1(gym.Env):
    def step(self, action):
        wind, elevation, slope = self._environment

-        old_energy = self._state[2]
+        old_state = self._state

        self._state = self._simstep(self._state, action, self._timestep,wind, elevation, slope, self._car)
-        reward = self._simreward(self._state, old_energy)[0]
+        reward = self._simreward(self._state, old_state)[0]
        terminated = False
        truncated = False
-        if jnp.all(self._state[0] > 10000):
+        if jnp.all(self._state[0] > 8000):
+            reward += 500
            terminated = True
-        if self._state[1] > 600:
+            # we want the time to be as close to 600 as possible
+            reward -= 600 - self._state[1][0]
+            reward += 1e-6 * (self._state[2][0]) # net energy is negative.
+        if jnp.all(self._state[1] > 600):
+            reward -= 50000
            truncated = True

        return self._get_obs(), reward, terminated, truncated, {}
--- a/src/solarcarsim/simv2.py
+++ b/src/solarcarsim/simv2.py
@ -1,14 +1,190 @@
-""" Second-generation simulator. More functional, cleaner code, faster """
+"""Second-generation simulator. More functional, cleaner code, faster"""

-from typing import NamedTuple
+from typing import NamedTuple, Optional, Tuple, Union, Dict, Any
 import jax
 import jax.numpy as jnp
+import chex
+from flax import struct
+from jax import lax
+from gymnax.environments import environment
+from gymnax.environments import spaces
+
+from solarcarsim.physsim import CarParams, fractal_noise_1d
+import solarcarsim.physsim as sim


-class SimState(NamedTuple):
-    position: float
-    time: float
-    energy: float
-    distance_remaining: float
-    time_remaining: float
+@struct.dataclass
+class SimState(environment.EnvState):
+    position: jnp.ndarray
+    velocity: jnp.ndarray
+    realtime: jnp.ndarray
+    energy: jnp.ndarray
+    # distance_remaining: jnp.ndarray
+    # time_remaining: jnp.ndarray
+    slope: jnp.ndarray

+
+@struct.dataclass
+class SimParams(environment.EnvParams):
+    car: CarParams = CarParams()
+    goal_time: int = 600
+    goal_dist: int = 8000
+    map_size: int = 10000
+    time_step: float = 1.0
+    terrain_lookahead: int = 100
+    # skip wind for now
+
+
+class Snax(environment.Environment[SimState, SimParams]):
+    """JAX version of the solar race simulator"""
+
+    @property
+    def default_params(self) -> SimParams:
+        return SimParams()
+
+    def action_space(self, params: Optional[SimParams] = None):
+        return spaces.Box(low=-1.0, high=1.0, shape=(1,))
+
+    def observation_space(self, params: Optional[SimParams] = None) -> spaces.Box:
+        if params is None:
+            params = self.default_params
+        # needs to be a box. it will be [pos, time, energy, dist_to_goal, time_remaining, terrain0, terrain1]
+        shape = 5 + params.terrain_lookahead
+        low = jnp.array(
+            [0, 0, -1e11, 0, 0] + [-1.0] * params.terrain_lookahead, dtype=jnp.float32
+        )
+        high = jnp.array(
+            [params.map_size, params.goal_time, 0, params.goal_dist, params.goal_time]
+            + [1.0] * params.terrain_lookahead,
+            dtype=jnp.float32,
+        )
+        return spaces.Box(low, high, shape=(shape,))
+        # return spaces.Dict(
+        #     {
+        #         "position": spaces.Box(0.0, params.map_size, (), jnp.float32),
+        #         "realtime": spaces.Box(0.0, params.goal_time + 100, (), jnp.float32),
+        #         "energy": spaces.Box(-1e11, 0.0, (), jnp.float32),
+        #         "dist_to_goal": spaces.Box(0.0, params.goal_dist, (), jnp.float32),
+        #         "time_remaining": spaces.Box(0.0, params.goal_time, (), jnp.float32),
+        #         "upcoming_terrain": spaces.Box(
+        #             -1.0, 1.0, shape=(100,), dtype=jnp.float32
+        #         ),
+        #         # skip wind for now
+        #     }
+        # )
+
+    def state_space(self, params: Optional[SimParams] = None) -> spaces.Dict:
+        if params is None:
+            params = self.default_params
+        return spaces.Dict(
+            {
+                "position": spaces.Box(0.0, params.map_size, (), jnp.float32),
+                "realtime": spaces.Box(0.0, params.goal_time + 100, (), jnp.float32),
+                "energy": spaces.Box(-1e11, 0.0, (), jnp.float32),
+                # "dist_to_goal": spaces.Box(0.0, params.goal_dist, (), jnp.float32),
+                # "time_remaining": spaces.Box(0.0, params.goal_time, (), jnp.float32),
+                "slope": spaces.Box(
+                    -1.0, 1.0, shape=(params.map_size,), dtype=jnp.float32
+                ),
+                "time": spaces.Discrete(int(params.goal_time / params.time_step)),
+            }
+        )
+
+    def reset_env(
+        self, key: chex.PRNGKey, params: Optional[SimParams] = None
+    ) -> Tuple[chex.Array, SimState]:
+        if params is None:
+            params = self.default_params
+        slope = fractal_noise_1d(key, 10000, scale=1200, height_scale=0.08)
+        init_state = SimState(
+            position=jnp.array(0.0),
+            velocity=jnp.array(0.0),
+            time=0,
+            realtime=jnp.array(0.0),
+            energy=jnp.array(0.0),
+            # distance_remaining=jnp.array(params.goal_dist),
+            # time_remaining=jnp.array(params.goal_time),
+            slope=slope,
+        )
+        return self.get_obs(init_state, key, params), init_state
+
+    def get_obs(
+        self, state: SimState, key: chex.PRNGKey, params: SimParams
+    ) -> chex.Array:
+        if params is None:
+            params = self.default_params
+
+        # get rounded position from state
+        pos_int = jnp.astype(state.position, jnp.int32)
+
+        terrain_view = jax.lax.dynamic_slice(state.slope, (pos_int,), (100,))
+        dist_to_goal = jnp.abs(params.goal_dist - state.position)
+        time_remaining = jnp.abs(params.goal_time - state.realtime)
+        main_state = jnp.array(
+            [state.position, state.realtime, state.energy, dist_to_goal, time_remaining]
+        )
+        return jnp.concat([main_state, terrain_view]).squeeze()
+
+    def step_env(
+        self,
+        key: chex.PRNGKey,
+        state: SimState,
+        action: Union[int, float, chex.Array],
+        params: SimParams,
+    ) -> Tuple[chex.Array, SimState, jnp.ndarray, jnp.ndarray, Dict[Any, Any]]:
+        pos = jnp.astype(state.position, jnp.int32)
+        theta = state.slope[pos]
+        velocity = jnp.array([action * params.car.max_speed]).squeeze()
+        dragf = sim.drag_force(
+            velocity, params.car.frontal_area, params.car.drag_coeff, 1.184
+        )
+        rollf = sim.rolling_force(params.car.mass, theta, params.car.rolling_coeff)
+        hillf = sim.downslope_force(params.car.mass, theta)
+        total_f = dragf + rollf + hillf
+        tau = params.car.wheel_radius * total_f / params.car.n_motors
+        p_draw = (
+            sim.bldc_power_draw(tau, velocity, params.car.motor) * params.car.n_motors
+        )
+
+        new_energy = state.energy - params.time_step * p_draw
+        new_position = state.position + jnp.cos(theta) * velocity * params.time_step
+        new_state = SimState(
+            position=new_position.squeeze(),
+            velocity=velocity.squeeze(),
+            realtime=state.realtime + params.time_step,
+            energy=new_energy.squeeze(),
+            slope=state.slope,
+            time=state.time + 1,
+        )
+
+        # compute reward
+        # reward = new_state.position / params.goal_dist
+        # if new_state.position >= params.goal_dist:
+        #     reward += 100
+        #     reward += params.goal_time - new_state.realtime
+        #     # penalize energy use
+        #     reward += 1e-7 * new_state.energy  # energy is negative
+        # if (
+        #     new_state.realtime >= params.goal_time
+        #     or new_state.time > params.max_steps_in_episode
+        # ):
+        #     reward -= 500
+
+        # we have to vectorize that.
+        reward = new_state.position / params.goal_dist + \
+                (new_state.position >= params.goal_dist) * (100 + params.goal_time - new_state.realtime + 1e-7*new_state.energy) + \
+                (new_state.realtime >= params.goal_time) * -500
+        reward = reward.squeeze()
+        terminal = self.is_terminal(state, params)
+        return (
+            lax.stop_gradient(self.get_obs(new_state, key, params)),
+            lax.stop_gradient(new_state),
+            reward,
+            terminal,
+            {},
+        )
+
+    def is_terminal(self, state: SimState, params: SimParams) -> jnp.ndarray:
+        finish = state.position >= params.goal_dist
+        timeout = state.time >= params.max_steps_in_episode
+        return jnp.logical_or(finish, timeout).squeeze()