diff --git a/pufferlib/config/ocean/orbital_dock.ini b/pufferlib/config/ocean/orbital_dock.ini new file mode 100644 index 0000000000..7e1510b5bc --- /dev/null +++ b/pufferlib/config/ocean/orbital_dock.ini @@ -0,0 +1,76 @@ +[base] +package = ocean +env_name = puffer_orbital_dock +policy_name = OrbitalDock +rnn_name = Recurrent + +[policy] +hidden_size = 256 + +[rnn] +input_size = 256 +hidden_size = 256 + +[vec] +num_envs = 4 + +[env] +num_envs = 1024 +mu = 3.986e14 +# GEO orbit: R = 42,164 km +station_radius = 42164000.0 +dt = 1.0 +max_thrust = 10.0 +mass = 500.0 +fuel_budget = 100.0 +max_steps = 2500 +# Docking point ([0, 60, 0] in LVLH) +dock_x = 0.0 +dock_y = 60.0 +dock_z = 0.0 +# Docking thresholds (10m, 2 m/s) +dock_dist = 10.0 +dock_speed = 2.0 +# Speed annealing: dock_speed_start -> dock_speed over anneal_steps per-env steps +dock_speed_start = 10.0 +anneal_steps = 50000 +# LOS cone (60 deg total, 800m extent) +los_angle = 60.0 +los_extent = 800.0 +# Initial conditions +init_x_center = 0.0 +init_y_center = 800.0 +init_z_center = 0.0 +init_x_range = 400.0 +init_y_range = 300.0 +init_z_range = 400.0 + +[train] +learning_rate = 0.001 +# 4096 total agents × 16 bptt = 65536 +batch_size = 65536 +minibatch_size = 4096 +update_epochs = 2 +gamma = 0.998 +gae_lambda = 0.98 +clip_coef = 0.06 +ent_coef = 0.005 +vf_coef = 0.5 +vf_clip_coef = 0.5 +max_grad_norm = 0.5 +anneal_lr = true +min_lr_ratio = 0.1 +total_timesteps = 500_000_000 +checkpoint_interval = 200 +bptt_horizon = 16 +use_rnn = false + +[sweep] +downsample = 0 + +[sweep.train.total_timesteps] +distribution = log_normal +min = 5e7 +max = 2e8 +mean = 1e8 +scale = time diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 6c56a4ea20..612b921e90 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -162,6 +162,7 @@ def make_multiagent(buf=None, **kwargs): 'spaces': make_spaces, 'multiagent': make_multiagent, 'slimevolley': 'SlimeVolley', + 'orbital_dock': 'OrbitalDock', } def env_creator(name='squared', *args, **kwargs): diff --git a/pufferlib/ocean/orbital_dock/README.md b/pufferlib/ocean/orbital_dock/README.md new file mode 100644 index 0000000000..b81ee53eec --- /dev/null +++ b/pufferlib/ocean/orbital_dock/README.md @@ -0,0 +1,71 @@ +# Orbital Dock + +Spacecraft rendezvous and docking in the LVLH (Hill) reference frame. A chaser spacecraft must navigate 400–1200m to reach a docking port located 60m along the station's V-bar (prograde) axis. The approach is constrained to a 60° line-of-sight cone centered on the docking axis — the chaser cannot dock from arbitrary directions but must align with the V-bar corridor. A hard speed gate (2.0 m/s) enforces a controlled final approach. The physics use Clohessy-Wiltshire linearized relative motion with RK4 integration — the standard model for proximity operations in circular orbit. + +PPO achieves a **99.5% dock rate** against a 2.0 m/s speed gate at 500M training steps. + +## Physics + +- **Dynamics:** Clohessy-Wiltshire (CW) equations, RK4 integration, dt = 1s +- **Orbit:** GEO (42,164 km altitude), mean motion n = 7.29e-5 rad/s +- **Thrust:** 10N max per axis, 500 kg chaser, max accel = 0.02 m/s² +- **Fuel:** 100 m/s delta-v budget +- **Frame:** LVLH — R-bar (radial), V-bar (prograde), H-bar (normal) + +## Observations (10D) + +| Index | Description | +|-------|-------------| +| 0-2 | Position [x, y, z] in LVLH (meters) | +| 3-5 | Velocity [vx, vy, vz] in LVLH (m/s) | +| 6 | Distance to dock point (m) | +| 7 | Speed (m/s) | +| 8 | Closing velocity (m/s, positive = approaching) | +| 9 | Time remaining (fraction) | + +## Actions + +Continuous `Box([-1, 1], shape=(3,))` — thrust fraction per LVLH axis (R-bar, V-bar, H-bar). + +## Termination + +- **Dock:** in LOS cone AND dist < 10m AND speed < 2.0 m/s +- **Crash:** y < dock_y - 5m (overshoot past dock point) +- **Timeout:** 2500 steps + +## Reward + +Per-step: +- `+0.01 * (prev_dist - dist)` — distance progress +- `+0.1 * (exp(-dist/20) - exp(-prev_dist/20))` — potential-based proximity +- `-0.005 * exp(-dist/30) * speed²` — proximity-weighted braking +- `-0.005 * (act²)` — control cost +- `-0.005` — time penalty + +Terminal: dock = +0.5 to +1.0 (soft speed bonus), crash = -1.0, timeout = -0.5 + +## Reward Shaping + +**Exponential proximity reward.** With linear distance progress (`prev_dist - dist`) over long distances, the agent learns to approach but won't risk crashing. The crash penalty outweighs the marginal progress reward near the dock, so the agent avoids docking. An exponential proximity bonus (`exp(-dist/20)`) pulls the agent to explore through the final approach. + +**Potential-based formulation.** A naive proximity bonus (`+0.1 * exp(-dist/20)`) is farmable — the agent can park at ~20m from the dock and collect +0.02/step indefinitely, earning more per episode than the +1.0 dock terminal reward. The potential-based version (`exp(-dist/20) - exp(-prev_dist/20)`) only rewards *movement toward* the dock. Hovering pays zero. + +**Proximity-weighted braking** (`exp(-dist/30) * speed²`) solves the final piece: speed control. Global velocity penalties create a "50m wall" where the agent learns to stop far from the dock to avoid the per-step speed cost. Weighting by proximity focuses the braking signal where it matters — at 800m the penalty is negligible, at 30m it's moderate, at 10m it's strong. The agent cruises at full speed for most of the approach and only brakes in the final stretch. + +**Velocity annealing** Annealing the velocity requirements for a successful dock (from 10 → 2 m/s over 50K per-env steps) bootstraps the dock reward signal. Without annealing, the agent never experiences docking and can't learn the goal. Short annealing gives brief exposure to successful trajectories, with the braking reward pushing gradients as the task becomes more difficult. + +## Architecture + +- Actor: 2-layer MLP (10 → 256 → 256 → 3), GELU, logstd init = -1.0 +- Critic: separate 2-layer MLP (10 → 256 → 256 → 1) +- RunningNorm on observations, no RNN +- 664.8K parameters + +Training: lr=0.001, clip=0.06, ent=0.005, epochs=2, gamma=0.998, gae=0.98, 1024 envs, batch=65536, minibatch=4096, 500M steps. + +## Files + +- `orbital_dock.h` — C environment (CW dynamics, reward, physics) +- `orbital_dock.py` — Python PufferEnv wrapper +- `binding.c` — C-Python binding +- `render.h` — Raylib 3D visualization \ No newline at end of file diff --git a/pufferlib/ocean/orbital_dock/binding.c b/pufferlib/ocean/orbital_dock/binding.c new file mode 100644 index 0000000000..2d49a139f7 --- /dev/null +++ b/pufferlib/ocean/orbital_dock/binding.c @@ -0,0 +1,55 @@ +#include "orbital_dock.h" +#include "render.h" + +#define Env OrbitalDock +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->client = NULL; + + // Physics parameters + env->mu = unpack(kwargs, "mu"); + env->station_radius = unpack(kwargs, "station_radius"); + env->dt = unpack(kwargs, "dt"); + env->max_thrust = unpack(kwargs, "max_thrust"); + env->mass = unpack(kwargs, "mass"); + env->fuel_budget = unpack(kwargs, "fuel_budget"); + env->max_steps = (int)unpack(kwargs, "max_steps"); + + // Docking point ([0, 60, 0]) + env->dock_x = unpack(kwargs, "dock_x"); + env->dock_y = unpack(kwargs, "dock_y"); + env->dock_z = unpack(kwargs, "dock_z"); + env->dock_dist = unpack(kwargs, "dock_dist"); + env->dock_speed = unpack(kwargs, "dock_speed"); + env->dock_speed_start = unpack(kwargs, "dock_speed_start"); + env->anneal_steps = (int)unpack(kwargs, "anneal_steps"); + env->global_step = 0; + + // LOS cone + double los_angle_deg = unpack(kwargs, "los_angle"); + env->los_half_angle = (los_angle_deg / 2.0) * M_PI / 180.0; + env->los_extent = unpack(kwargs, "los_extent"); + + // Initial condition ranges + env->init_x_center = unpack(kwargs, "init_x_center"); + env->init_y_center = unpack(kwargs, "init_y_center"); + env->init_z_center = unpack(kwargs, "init_z_center"); + env->init_x_range = unpack(kwargs, "init_x_range"); + env->init_y_range = unpack(kwargs, "init_y_range"); + env->init_z_range = unpack(kwargs, "init_z_range"); + + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "dock_success", log->dock_success); + assign_to_dict(dict, "crash_rate", log->crash_rate); + assign_to_dict(dict, "timeout_rate", log->timeout_rate); + assign_to_dict(dict, "fuel_used", log->fuel_used); + assign_to_dict(dict, "final_distance", log->final_distance); + assign_to_dict(dict, "final_rel_speed", log->final_rel_speed); + return 0; +} diff --git a/pufferlib/ocean/orbital_dock/orbital_dock.c b/pufferlib/ocean/orbital_dock/orbital_dock.c new file mode 100644 index 0000000000..cc5078a2be --- /dev/null +++ b/pufferlib/ocean/orbital_dock/orbital_dock.c @@ -0,0 +1,75 @@ +// Standalone C demo for orbital_dock environment +// Compile using: ./scripts/build_ocean.sh orbital_dock [local|fast] +// Run with: ./orbital_dock + +#include "orbital_dock.h" +#include "render.h" +#include + +void generate_random_actions(OrbitalDock *env) { + env->actions[0] = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f; + env->actions[1] = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f; + env->actions[2] = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f; +} + +int main() { + srand(time(NULL)); + + OrbitalDock *env = calloc(1, sizeof(OrbitalDock)); + + // Physics parameters (GEO orbit) + env->mu = 3.986e14; + env->station_radius = 42164e3; + env->dt = 1.0; + env->max_thrust = 10.0; + env->mass = 500.0; + env->fuel_budget = 100.0; + env->max_steps = 2500; + + // Docking point + env->dock_x = 0.0; + env->dock_y = 60.0; + env->dock_z = 0.0; + env->dock_dist = 10.0; + env->dock_speed = 2.0; + env->dock_speed_start = 10.0; + env->anneal_steps = 50000; + env->global_step = 0; + + // LOS cone (60 deg total) + env->los_half_angle = 30.0 * M_PI / 180.0; + env->los_extent = 800.0; + + // Initial condition ranges + env->init_x_center = 0.0; + env->init_y_center = 800.0; + env->init_z_center = 0.0; + env->init_x_range = 400.0; + env->init_y_range = 300.0; + env->init_z_range = 400.0; + + // Allocate buffers + env->observations = (float *)calloc(10, sizeof(float)); + env->actions = (float *)calloc(3, sizeof(float)); + env->rewards = (float *)calloc(1, sizeof(float)); + env->terminals = (unsigned char *)calloc(1, sizeof(unsigned char)); + env->client = NULL; + + c_reset(env); + c_render(env); + + while (!WindowShouldClose()) { + generate_random_actions(env); + c_step(env); + c_render(env); + } + + c_close(env); + free(env->observations); + free(env->actions); + free(env->rewards); + free(env->terminals); + free(env); + + return 0; +} diff --git a/pufferlib/ocean/orbital_dock/orbital_dock.h b/pufferlib/ocean/orbital_dock/orbital_dock.h new file mode 100644 index 0000000000..13033bf495 --- /dev/null +++ b/pufferlib/ocean/orbital_dock/orbital_dock.h @@ -0,0 +1,373 @@ +#ifndef ORBITAL_DOCK_H +#define ORBITAL_DOCK_H + +#include +#include +#include +#include +#include "raylib.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +// Colors for rendering +static const Color PUFF_CYAN = {0, 187, 187, 255}; +static const Color PUFF_WHITE = {241, 241, 241, 255}; +static const Color PUFF_BACKGROUND = {6, 24, 24, 255}; +static const Color PUFF_YELLOW = {255, 200, 0, 255}; +static const Color PUFF_RED = {187, 0, 0, 255}; +static const Color PUFF_ORANGE = {255, 128, 0, 255}; + +// ============================================================================ +// Log Struct (all floats, ending with n) +// ============================================================================ + +typedef struct { + float episode_return; + float episode_length; + float dock_success; + float crash_rate; + float timeout_rate; + float fuel_used; + float final_distance; + float final_rel_speed; + float n; // Required as last field +} Log; + +// Forward declaration — full definition in render.h +typedef struct Client Client; + +// ============================================================================ +// Environment Struct +// ============================================================================ + +typedef struct { + Log log; // Required field (first) + float* observations; // Required field - 10 floats [x, y, z, vx, vy, vz, dist, speed, closing_vel, time_remaining] + float* actions; // Required field - 3 floats (thrust fractions [-1,1]) + float* rewards; // Required field + unsigned char* terminals; // Required field + + // CW state in LVLH frame (meters, m/s) + double x, y, z; // Position relative to station + double vx, vy, vz; // Velocity relative to station + double prev_dist; // Previous distance to dock (for progress reward) + + // CW orbital parameter + double n; // Mean motion = sqrt(mu/R^3) (rad/s) + + // Environment state + double fuel; // Remaining delta-v (m/s) + int step_count; + int max_steps; + + // Physics config + double mu; // Gravitational parameter (m^3/s^2) + double station_radius; // Station orbit radius (m) + double max_thrust; // Max thrust per axis (N) + double mass; // Chaser mass (kg) + double dt; // Timestep (s) + double fuel_budget; // Total delta-v budget (m/s) + + // Docking conditions (dock at [0, 60, 0]) + double dock_x, dock_y, dock_z; // Docking point in LVLH + double dock_dist; // Docking distance threshold (m) + double dock_speed; // Final docking speed threshold (m/s) + double dock_speed_start; // Starting speed threshold for annealing (m/s) + int anneal_steps; // Per-env steps to anneal dock_speed (0 = no annealing) + int global_step; // Persistent step counter (never resets) + + // LOS cone (60 deg total, 800m extent along +y) + double los_half_angle; // Half-angle in radians + double los_extent; // Max extent along y-axis (m) + + // Initial condition ranges + double init_x_center, init_y_center, init_z_center; + double init_x_range, init_y_range, init_z_range; + + // Render client (NULL if not rendering) + Client *client; +} OrbitalDock; + +static inline double rndf(void) { + return (double)rand() / (double)RAND_MAX; +} + +static inline double rndf_range(double min, double max) { + return min + rndf() * (max - min); +} + +// ============================================================================ +// CW Dynamics: Clohessy-Wiltshire Linear Relative Motion +// ============================================================================ + +// CW derivatives: dx/dt = f(state, thrust_accel) +// state = [x, y, z, vx, vy, vz] +// accel = [ax, ay, az] (thrust/mass in LVLH) +static void cw_derivatives(double n, const double state[6], + const double accel[3], double deriv[6]) { + double x = state[0], y = state[1], z = state[2]; + double vx = state[3], vy = state[4], vz = state[5]; + + deriv[0] = vx; + deriv[1] = vy; + deriv[2] = vz; + deriv[3] = 3.0*n*n*x + 2.0*n*vy + accel[0]; // radial + deriv[4] = -2.0*n*vx + accel[1]; // along-track + deriv[5] = -n*n*z + accel[2]; // cross-track +} + +// RK4 integration of CW equations over one timestep +static void cw_step_rk4(OrbitalDock* env, double ax, double ay, double az) { + double dt = env->dt; + double n = env->n; + double accel[3] = {ax, ay, az}; + + double s[6] = {env->x, env->y, env->z, env->vx, env->vy, env->vz}; + double k1[6], k2[6], k3[6], k4[6], tmp[6]; + + // k1 + cw_derivatives(n, s, accel, k1); + + // k2 + for (int i = 0; i < 6; i++) tmp[i] = s[i] + 0.5*dt*k1[i]; + cw_derivatives(n, tmp, accel, k2); + + // k3 + for (int i = 0; i < 6; i++) tmp[i] = s[i] + 0.5*dt*k2[i]; + cw_derivatives(n, tmp, accel, k3); + + // k4 + for (int i = 0; i < 6; i++) tmp[i] = s[i] + dt*k3[i]; + cw_derivatives(n, tmp, accel, k4); + + // Update state + for (int i = 0; i < 6; i++) { + s[i] += (dt / 6.0) * (k1[i] + 2.0*k2[i] + 2.0*k3[i] + k4[i]); + } + + env->x = s[0]; env->y = s[1]; env->z = s[2]; + env->vx = s[3]; env->vy = s[4]; env->vz = s[5]; +} + +// ============================================================================ +// LOS Cone Check (60 deg total, along +y from docking point) +// ============================================================================ + +static int in_los(OrbitalDock* env) { + // Vector from docking point to chaser + double px = env->x - env->dock_x; + double py = env->y - env->dock_y; + double pz = env->z - env->dock_z; + + // Must be in front of docking point (positive y relative to dock) + if (py < 0.0 || py > env->los_extent) return 0; + + // LOS cone axis is along +y + double p_norm = sqrt(px*px + py*py + pz*pz); + if (p_norm < 1e-10) return 1; // At docking point + + // cos(angle) = dot(p, cone_axis) / |p| + // cone_axis = [0, 1, 0], so dot = py + double cos_angle = py / p_norm; + + return (cos_angle >= cos(env->los_half_angle)) ? 1 : 0; +} + +// ============================================================================ +// Observation Computation (10 floats: raw LVLH state + computed features) +// ============================================================================ + +static void compute_observations(OrbitalDock* env) { + // Raw LVLH state + env->observations[0] = (float)env->x; + env->observations[1] = (float)env->y; + env->observations[2] = (float)env->z; + env->observations[3] = (float)env->vx; + env->observations[4] = (float)env->vy; + env->observations[5] = (float)env->vz; + + // Computed features + double dx = env->x - env->dock_x; + double dy = env->y - env->dock_y; + double dz = env->z - env->dock_z; + double dist = sqrt(dx*dx + dy*dy + dz*dz); + double speed = sqrt(env->vx*env->vx + env->vy*env->vy + env->vz*env->vz); + double closing_vel = (dist > 1e-6) ? -(dx*env->vx + dy*env->vy + dz*env->vz) / dist : 0.0; + double time_remaining = 1.0 - (double)env->step_count / (double)env->max_steps; + + env->observations[6] = (float)dist; + env->observations[7] = (float)speed; + env->observations[8] = (float)closing_vel; + env->observations[9] = (float)time_remaining; +} + +// ============================================================================ +// Reset +// ============================================================================ + +void c_reset(OrbitalDock* env) { + env->step_count = 0; + + // Compute mean motion + env->n = sqrt(env->mu / (env->station_radius * env->station_radius * env->station_radius)); + + // V-bar approach initial conditions + // Position: [0, 800, 0] +/- [400, 300, 400] + double x_off = rndf_range(-1.0, 1.0) * env->init_x_range; + double y_off = rndf_range(-1.0, 1.0) * env->init_y_range; + double z_off = rndf_range(-1.0, 1.0) * env->init_z_range; + + env->x = env->init_x_center + x_off; + env->y = env->init_y_center + y_off; + env->z = env->init_z_center + z_off; + + // Velocity: random in ~[-2, 2] m/s per axis + env->vx = rndf_range(-2.0, 2.0); + env->vy = rndf_range(-2.0, 2.0); + env->vz = rndf_range(-2.0, 2.0); + + // Initialize fuel + env->fuel = env->fuel_budget; + + // Compute initial distance to dock for progress reward + double dx0 = env->x - env->dock_x; + double dy0 = env->y - env->dock_y; + double dz0 = env->z - env->dock_z; + env->prev_dist = sqrt(dx0*dx0 + dy0*dy0 + dz0*dz0); + + compute_observations(env); +} + +// ============================================================================ +// Step +// ============================================================================ + +void c_step(OrbitalDock* env) { + env->rewards[0] = 0.0f; + env->terminals[0] = 0; + env->step_count++; + + // Get thrust actions [-1, 1] -> force in Newtons + double act_x = fmax(-1.0, fmin(1.0, (double)env->actions[0])); + double act_y = fmax(-1.0, fmin(1.0, (double)env->actions[1])); + double act_z = fmax(-1.0, fmin(1.0, (double)env->actions[2])); + + double fx = act_x * env->max_thrust; + double fy = act_y * env->max_thrust; + double fz = act_z * env->max_thrust; + + // Fuel consumption + double f_mag = sqrt(fx*fx + fy*fy + fz*fz); + double dv_used = (f_mag / env->mass) * env->dt; + double ax = fx / env->mass; + double ay = fy / env->mass; + double az = fz / env->mass; + + if (env->fuel > 0 && f_mag > 0) { + double actual_dv = fmin(dv_used, env->fuel); + env->fuel -= actual_dv; + if (actual_dv < dv_used) { + double scale = actual_dv / dv_used; + ax *= scale; ay *= scale; az *= scale; + } + } else if (env->fuel <= 0) { + ax = 0; ay = 0; az = 0; + } + + // Integrate CW dynamics + cw_step_rk4(env, ax, ay, az); + + // Compute distance to docking point + double dx = env->x - env->dock_x; + double dy = env->y - env->dock_y; + double dz = env->z - env->dock_z; + double dist = sqrt(dx*dx + dy*dy + dz*dz); + double speed = sqrt(env->vx*env->vx + env->vy*env->vy + env->vz*env->vz); + + // Compute effective dock_speed (annealing) + env->global_step++; + double effective_dock_speed = env->dock_speed; + if (env->anneal_steps > 0) { + double frac = fmin(1.0, (double)env->global_step / (double)env->anneal_steps); + effective_dock_speed = env->dock_speed_start + frac * (env->dock_speed - env->dock_speed_start); + } + + // Check termination conditions + int is_in_los = in_los(env); + int docked = is_in_los && (dist < env->dock_dist) && (speed < effective_dock_speed); + int collision = (env->y < env->dock_y - 5.0); // y < 55m (5m past dock point) + int timeout = (env->step_count >= env->max_steps); + + // Compute reward + double reward = 0.0; + double progress = env->prev_dist - dist; // positive when approaching dock + + reward += 0.01 * progress; // distance progress + double prox_now = exp(-dist / 20.0); // proximity potential + double prox_prev = exp(-env->prev_dist / 20.0); + reward += 0.1 * (prox_now - prox_prev); // potential-based proximity + double brake_weight = exp(-dist / 30.0); + reward -= 0.005 * brake_weight * speed * speed; // proximity-weighted braking + reward -= 0.005 * (act_x*act_x + act_y*act_y + act_z*act_z); // control cost + reward -= 0.005; // time penalty + + // Terminal rewards (replace per-step reward) + if (docked) { + double speed_bonus = 0.5 * fmax(0.0, 1.0 - speed / effective_dock_speed); + reward = 0.5 + speed_bonus; // speed=0→1.0, speed=dock_speed→0.5 + env->terminals[0] = 1; + env->log.dock_success += 1.0f; + } else if (collision) { + reward = -1.0; + env->terminals[0] = 1; + env->log.crash_rate += 1.0f; + } else if (timeout) { + reward = -0.5; + env->terminals[0] = 1; + env->log.timeout_rate += 1.0f; + } + + env->rewards[0] = (float)reward; + env->prev_dist = dist; + + // Update log + env->log.episode_return += (float)reward; + if (env->terminals[0]) { + env->log.episode_length += (float)env->step_count; + env->log.fuel_used += (float)(env->fuel_budget - env->fuel); + env->log.final_distance += (float)dist; + env->log.final_rel_speed += (float)speed; + env->log.n += 1.0f; + + c_reset(env); + } + + compute_observations(env); +} + +// ============================================================================ +// Render (implemented in render.h) +// ============================================================================ + +Client* make_client(OrbitalDock *env); +void close_client(Client *client); +void render_orbital_dock(OrbitalDock *env, Client *client); + +void c_render(OrbitalDock* env) { + if (env->client == NULL) { + env->client = make_client(env); + if (env->client == NULL) return; + } + render_orbital_dock(env, env->client); +} + +void c_close(OrbitalDock* env) { + if (env->client != NULL) { + close_client(env->client); + env->client = NULL; + } +} + +#endif // ORBITAL_DOCK_H diff --git a/pufferlib/ocean/orbital_dock/orbital_dock.py b/pufferlib/ocean/orbital_dock/orbital_dock.py new file mode 100644 index 0000000000..cb938d4d50 --- /dev/null +++ b/pufferlib/ocean/orbital_dock/orbital_dock.py @@ -0,0 +1,178 @@ +'''Orbital rendezvous and docking in LVLH frame. + +Clohessy-Wiltshire relative motion dynamics with RK4 integration. +3-axis continuous thrust control, LOS cone constraint, speed-gated docking. +''' + +import gymnasium +import numpy as np + +import pufferlib +from pufferlib.ocean.orbital_dock import binding + + +class OrbitalDock(pufferlib.PufferEnv): + def __init__( + self, + num_envs=1, + render_mode=None, + report_interval=128, + buf=None, + seed=0, + # Physics + mu=3.986e14, + station_radius=42164000.0, + dt=1.0, + max_thrust=10.0, + mass=500.0, + fuel_budget=100.0, + max_steps=2500, + # Docking point ([0, 60, 0] in LVLH) + dock_x=0.0, + dock_y=60.0, + dock_z=0.0, + dock_dist=10.0, + dock_speed=2.0, + dock_speed_start=10.0, + anneal_steps=200000, + # LOS cone + los_angle=60.0, + los_extent=800.0, + # Initial conditions + init_x_center=0.0, + init_y_center=800.0, + init_z_center=0.0, + init_x_range=400.0, + init_y_range=300.0, + init_z_range=400.0, + ): + # 10-dimensional observation: raw LVLH state + computed features + # [x, y, z, vx, vy, vz, dist, speed, closing_vel, time_remaining] + self.single_observation_space = gymnasium.spaces.Box( + low=-2000.0, high=2000.0, shape=(10,), dtype=np.float32 + ) + + # Continuous action space: normalized thrust fractions in LVLH frame + self.single_action_space = gymnasium.spaces.Box( + low=-1.0, high=1.0, shape=(3,), dtype=np.float32 + ) + + self.render_mode = None if render_mode in (None, 'None') else render_mode + self.num_agents = num_envs + self.report_interval = report_interval + self.tick = 0 + + super().__init__(buf) + + self.c_envs = binding.vec_init( + self.observations, + self.actions, + self.rewards, + self.terminals, + self.truncations, + num_envs, + seed, + mu=mu, + station_radius=station_radius, + dt=dt, + max_thrust=max_thrust, + mass=mass, + fuel_budget=fuel_budget, + max_steps=max_steps, + dock_x=dock_x, + dock_y=dock_y, + dock_z=dock_z, + dock_dist=dock_dist, + dock_speed=dock_speed, + dock_speed_start=dock_speed_start, + anneal_steps=anneal_steps, + los_angle=los_angle, + los_extent=los_extent, + init_x_center=init_x_center, + init_y_center=init_y_center, + init_z_center=init_z_center, + init_x_range=init_x_range, + init_y_range=init_y_range, + init_z_range=init_z_range, + ) + + def reset(self, seed=0): + self.tick = 0 + binding.vec_reset(self.c_envs, seed) + return self.observations, [] + + def step(self, actions): + self.actions[:] = actions + self.tick += 1 + binding.vec_step(self.c_envs) + + if self.render_mode == 'human': + self.render() + + info = [] + if self.tick % self.report_interval == 0: + log_data = binding.vec_log(self.c_envs) + if log_data: + info.append(log_data) + + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + + +def test_performance(timeout=10, atn_cache=1024, num_envs=4096): + '''Test environment performance.''' + import time + env = OrbitalDock(num_envs=num_envs) + env.reset() + tick = 0 + actions = np.random.uniform(-1.0, 1.0, (atn_cache, num_envs, 3)).astype(np.float32) + start = time.time() + while time.time() - start < timeout: + env.step(actions[tick % atn_cache]) + tick += 1 + elapsed = time.time() - start + sps = int(num_envs * tick / elapsed) + print(f'OrbitalDock SPS: {sps:,}') + env.close() + return sps + + +def test_basic(): + '''Basic functionality test.''' + print('Testing OrbitalDock (CW dynamics)...') + env = OrbitalDock(num_envs=2) + obs, _ = env.reset(seed=42) + print(f' Obs shape: {obs.shape}') + print(f' Obs range: [{obs.min():.1f}, {obs.max():.1f}]') + print(f' Obs[0]: x={obs[0,0]:.1f} y={obs[0,1]:.1f} z={obs[0,2]:.1f} ' + f'vx={obs[0,3]:.3f} vy={obs[0,4]:.3f} vz={obs[0,5]:.3f}') + + # Coast (zero thrust) + zero = np.zeros((2, 3), dtype=np.float32) + for i in range(10): + obs, r, t, tr, info = env.step(zero) + print(f' After 10 steps (zero thrust):') + print(f' Rewards: {r}') + print(f' Obs[0]: x={obs[0,0]:.1f} y={obs[0,1]:.1f} z={obs[0,2]:.1f}') + + # Random thrust + for i in range(100): + actions = np.random.uniform(-1.0, 1.0, (2, 3)).astype(np.float32) + obs, r, t, tr, info = env.step(actions) + print(f' After 100 random steps:') + print(f' Obs range: [{obs.min():.1f}, {obs.max():.1f}]') + + env.close() + print(' Test passed!') + + +if __name__ == '__main__': + test_basic() + print() + test_performance() diff --git a/pufferlib/ocean/orbital_dock/render.h b/pufferlib/ocean/orbital_dock/render.h new file mode 100644 index 0000000000..7049694fbd --- /dev/null +++ b/pufferlib/ocean/orbital_dock/render.h @@ -0,0 +1,655 @@ +#ifndef ORBITAL_DOCK_RENDER_H +#define ORBITAL_DOCK_RENDER_H + +#include "raymath.h" +#include + +// ============================================================================ +// Render Client Struct +// ============================================================================ + +#define RENDER_WIDTH 1080 +#define RENDER_HEIGHT 720 +#define TRAIL_LENGTH 256 +#define NUM_STARS 300 +#define EARTH_RADIUS_M 6371000.0 +#define GEO_RADIUS_M 42164000.0 +#define VISUAL_ORBIT_TIME_SCALE 30.0 +#define VISUAL_PATH_DIRECTION -1.0 // Orbit path direction around Earth in render +#define VISUAL_VBAR_DIRECTION 1.0 // Keep LVLH +V-bar orientation as configured +#define VISUAL_SPIN_TIME_SCALE 2.0 +#define VISUAL_EARTH_SPIN_MULT 60.0 +#define LVLH_OVERLAY_SCALE_M 40.0 +#define LVLH_VIS_MAX_M 900.0 + +typedef struct { + double x, y, z; +} Vec3d; + +typedef struct { + Vec3d pos[TRAIL_LENGTH]; + int index; + int count; +} Trail; + +struct Client { + Camera3D camera; + float width; + float height; + float camera_distance; + float camera_azimuth; + float camera_elevation; + float camera_min_distance; + float camera_max_distance; + bool is_dragging; + Vector2 last_mouse_pos; + + Trail trail; + float far_scale; // Far background scale for docking mode + float star_radius; + int last_step; + + // Render-only analytical orbital state + double mean_motion; + double theta; + double earth_spin_angle; + Vec3d rs; + Vec3d rhat; + Vec3d vhat; + Vec3d hhat; + + Vec3d stars[NUM_STARS]; + Model earth_model; + bool earth_loaded; + Vector3 earth_center; + float earth_model_radius; +}; + +// Additional colors +static const Color PUFF_GREEN = {0, 187, 0, 255}; +static const Color PUFF_MAGENTA = {187, 0, 187, 255}; + +static inline Vec3d vec3d(double x, double y, double z) { + Vec3d v = {x, y, z}; + return v; +} + +static inline Vec3d vec3d_add(Vec3d a, Vec3d b) { + return vec3d(a.x + b.x, a.y + b.y, a.z + b.z); +} + +static inline Vec3d vec3d_scale(Vec3d a, double s) { + return vec3d(a.x * s, a.y * s, a.z * s); +} + +static inline double vec3d_dot(Vec3d a, Vec3d b) { + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +static inline Vec3d vec3d_cross(Vec3d a, Vec3d b) { + return vec3d( + a.y * b.z - a.z * b.y, + a.z * b.x - a.x * b.z, + a.x * b.y - a.y * b.x + ); +} + +static inline double vec3d_norm(Vec3d a) { + return sqrt(vec3d_dot(a, a)); +} + +static inline Vec3d vec3d_normalize(Vec3d a, Vec3d fallback) { + double n = vec3d_norm(a); + if (n < 1e-12) return fallback; + return vec3d_scale(a, 1.0 / n); +} + +static inline float clampf_render(float v, float lo, float hi) { + return v < lo ? lo : (v > hi ? hi : v); +} + +static inline unsigned int render_xorshift32(unsigned int* state) { + unsigned int x = *state; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + *state = x; + return x; +} + +static inline double render_rand_signed(unsigned int* state) { + return 2.0 * ((double)render_xorshift32(state) / (double)0xFFFFFFFF) - 1.0; +} + +static void set_default_camera(Client *client) { + client->camera_distance = 170.0f; + client->camera_azimuth = -0.72f; + client->camera_elevation = 0.26f; + client->camera_min_distance = 12.0f; + client->camera_max_distance = 360.0f; +} + +static void update_camera_position(Client *c, Vector3 target) { + float r = c->camera_distance; + float az = c->camera_azimuth; + float el = c->camera_elevation; + + float x = r * cosf(el) * cosf(az); + float y = r * cosf(el) * sinf(az); + float z = r * sinf(el); + + c->camera.position = (Vector3){target.x + x, target.y + y, target.z + z}; + c->camera.target = target; +} + +static void handle_camera_controls(Client *client) { + Vector2 mouse_pos = GetMousePosition(); + + if (IsMouseButtonPressed(MOUSE_BUTTON_LEFT)) { + client->is_dragging = true; + client->last_mouse_pos = mouse_pos; + } + + if (IsMouseButtonReleased(MOUSE_BUTTON_LEFT)) { + client->is_dragging = false; + } + + if (client->is_dragging && IsMouseButtonDown(MOUSE_BUTTON_LEFT)) { + Vector2 mouse_delta = { + mouse_pos.x - client->last_mouse_pos.x, + mouse_pos.y - client->last_mouse_pos.y + }; + + float sensitivity = 0.005f; + client->camera_azimuth -= mouse_delta.x * sensitivity; + client->camera_elevation += mouse_delta.y * sensitivity; + client->camera_elevation = clampf_render(client->camera_elevation, + -M_PI / 2.0f + 0.1f, + M_PI / 2.0f - 0.1f); + client->last_mouse_pos = mouse_pos; + } + + float wheel = GetMouseWheelMove(); + if (wheel != 0) { + client->camera_distance -= wheel * 2.0f; + client->camera_distance = clampf_render(client->camera_distance, + client->camera_min_distance, + client->camera_max_distance); + } +} + +static void update_orbital_basis(Client *client, OrbitalDock *env) { + double R = env->station_radius; + client->mean_motion = sqrt(env->mu / (R * R * R)); + + // Use global_step so visual orbit is continuous across episode resets. + double t = (double)env->global_step * env->dt * VISUAL_ORBIT_TIME_SCALE; + double path_dir = VISUAL_PATH_DIRECTION; + client->theta = fmod(path_dir * t * client->mean_motion, 2.0 * M_PI); + if (client->theta < 0.0) client->theta += 2.0 * M_PI; + + double c = cos(client->theta); + double s = sin(client->theta); + // Orbit in X-Y plane (normal +Z) to match renderer's Z-up convention. + client->rs = vec3d(R * c, R * s, 0.0); + client->rhat = vec3d_normalize(client->rs, vec3d(1.0, 0.0, 0.0)); + client->hhat = vec3d(0.0, 0.0, 1.0); + client->vhat = vec3d_normalize(vec3d_cross(client->hhat, client->rhat), + vec3d(0.0, 1.0, 0.0)); + if (VISUAL_VBAR_DIRECTION < 0.0) { + client->vhat = vec3d_scale(client->vhat, -1.0); + } +} + +static Vector3 eci_to_render(Vec3d p_eci, float meters_per_unit) { + return (Vector3){ + (float)(p_eci.x / meters_per_unit), + (float)(p_eci.y / meters_per_unit), + (float)(p_eci.z / meters_per_unit) + }; +} + +Client* make_client(OrbitalDock *env) { + Client *client = (Client*)calloc(1, sizeof(Client)); + + client->width = RENDER_WIDTH; + client->height = RENDER_HEIGHT; + + SetConfigFlags(FLAG_MSAA_4X_HINT); + InitWindow(RENDER_WIDTH, RENDER_HEIGHT, "PufferLib Orbital Dock"); + SetTargetFPS(60); + + if (!IsWindowReady()) { + free(client); + return NULL; + } + + client->is_dragging = false; + client->last_mouse_pos = (Vector2){0, 0}; + + client->camera.up = (Vector3){0, 0, 1}; + client->camera.fovy = 45.0f; + client->camera.projection = CAMERA_PERSPECTIVE; + + set_default_camera(client); + update_camera_position(client, (Vector3){0, 0, 0}); + + client->far_scale = (float)fmax(1.0, env->station_radius / 42.0); + client->star_radius = 520.0f; + + client->trail.index = 0; + client->trail.count = 0; + client->last_step = -1; + for (int i = 0; i < TRAIL_LENGTH; i++) { + client->trail.pos[i] = vec3d(0, 0, 0); + } + + update_orbital_basis(client, env); + + unsigned int seed = 0xC0FFEE01u; + for (int i = 0; i < NUM_STARS; i++) { + double x, y, z, r2; + do { + x = render_rand_signed(&seed); + y = render_rand_signed(&seed); + z = render_rand_signed(&seed); + r2 = x*x + y*y + z*z; + } while (r2 > 1.0 || r2 < 0.001); + double inv_r = 1.0 / sqrt(r2); + client->stars[i] = vec3d(x * inv_r, y * inv_r, z * inv_r); + } + + client->earth_model = LoadModel("resources/orbital_dock/ps1_style_low_poly_earth.glb"); + client->earth_loaded = client->earth_model.meshCount > 0; + client->earth_center = (Vector3){0.0f, 0.0f, 0.0f}; + client->earth_model_radius = 1.0f; + if (client->earth_loaded && client->earth_model.meshCount > 0) { + Vector3 bb_min = (Vector3){FLT_MAX, FLT_MAX, FLT_MAX}; + Vector3 bb_max = (Vector3){-FLT_MAX, -FLT_MAX, -FLT_MAX}; + for (int i = 0; i < client->earth_model.meshCount; i++) { + BoundingBox bb = GetMeshBoundingBox(client->earth_model.meshes[i]); + bb_min.x = fminf(bb_min.x, bb.min.x); + bb_min.y = fminf(bb_min.y, bb.min.y); + bb_min.z = fminf(bb_min.z, bb.min.z); + bb_max.x = fmaxf(bb_max.x, bb.max.x); + bb_max.y = fmaxf(bb_max.y, bb.max.y); + bb_max.z = fmaxf(bb_max.z, bb.max.z); + } + client->earth_center = (Vector3){ + 0.5f * (bb_min.x + bb_max.x), + 0.5f * (bb_min.y + bb_max.y), + 0.5f * (bb_min.z + bb_max.z) + }; + float hx = 0.5f * (bb_max.x - bb_min.x); + float hy = 0.5f * (bb_max.y - bb_min.y); + float hz = 0.5f * (bb_max.z - bb_min.z); + float r = sqrtf(hx*hx + hy*hy + hz*hz); + client->earth_model_radius = fmaxf(1e-4f, r); + Matrix center = MatrixTranslate( + -client->earth_center.x, + -client->earth_center.y, + -client->earth_center.z + ); + Matrix align = MatrixRotateX(PI / 2.0f); + client->earth_model.transform = MatrixMultiply(align, center); + } + client->earth_spin_angle = 0.0; + + return client; +} + +void close_client(Client *client) { + if (client) { + if (client->earth_loaded) { + UnloadModel(client->earth_model); + } + CloseWindow(); + free(client); + } +} + +static void draw_earth_model(Client *client, Vector3 render_pos, float model_scale) { + if (!client->earth_loaded) { + DrawSphere(render_pos, model_scale, (Color){30, 90, 180, 255}); + return; + } + + float draw_scale = model_scale / client->earth_model_radius; + DrawModelEx(client->earth_model, + render_pos, + (Vector3){0.0f, 0.0f, 1.0f}, + RAD2DEG * (float)client->earth_spin_angle, + (Vector3){draw_scale, draw_scale, draw_scale}, + WHITE); +} + +static void draw_orbit_ring_scaled(OrbitalDock *env, float eci_meters_per_unit, Color color) { + const int n_seg = 192; + for (int i = 0; i < n_seg; i++) { + double t0 = 2.0 * M_PI * i / n_seg; + double t1 = 2.0 * M_PI * (i + 1) / n_seg; + Vec3d p0 = vec3d(env->station_radius * cos(t0), env->station_radius * sin(t0), 0.0); + Vec3d p1 = vec3d(env->station_radius * cos(t1), env->station_radius * sin(t1), 0.0); + DrawLine3D(eci_to_render(p0, eci_meters_per_unit), + eci_to_render(p1, eci_meters_per_unit), color); + } +} + +static void draw_station_trail_scaled(Client *client, OrbitalDock *env, float eci_meters_per_unit, Color base_color) { + const int n_pts = 220; + double dtheta = client->mean_motion * env->dt * 3.0; + for (int i = 0; i < n_pts - 1; i++) { + double t0 = client->theta - dtheta * i; + double t1 = client->theta - dtheta * (i + 1); + Vec3d p0 = vec3d(env->station_radius * cos(t0), env->station_radius * sin(t0), 0.0); + Vec3d p1 = vec3d(env->station_radius * cos(t1), env->station_radius * sin(t1), 0.0); + float age = (float)i / (float)n_pts; + float fade = 1.0f - age; + unsigned char r = (unsigned char)(base_color.r * (0.3f + 0.7f * fade)); + unsigned char g = (unsigned char)(base_color.g * (0.3f + 0.7f * fade)); + unsigned char b = (unsigned char)(base_color.b * (0.3f + 0.7f * fade)); + Color c = {r, g, b, (unsigned char)(230.0f * fade)}; + DrawLine3D(eci_to_render(p0, eci_meters_per_unit), + eci_to_render(p1, eci_meters_per_unit), c); + } +} + +// Render LVLH offsets as a local overlay around the station: +// station position uses far-scene scale, relative offsets use local overlay scale. +static Vector3 lvlh_to_orbit_overlay_render(Client *client, + double x, double y, double z, + float station_m_per_unit, + float overlay_m_per_unit) { + Vec3d delta_eci = vec3d_add( + vec3d_scale(client->rhat, x), + vec3d_add(vec3d_scale(client->vhat, y), vec3d_scale(client->hhat, z))); + Vector3 station = eci_to_render(client->rs, station_m_per_unit); + Vector3 delta = eci_to_render(delta_eci, overlay_m_per_unit); + return (Vector3){station.x + delta.x, station.y + delta.y, station.z + delta.z}; +} + +// Convert a pure LVLH direction/offset (meters) into render delta for overlay scale. +static Vector3 lvlh_overlay_delta_render(Client *client, + double x, double y, double z, + float overlay_m_per_unit) { + Vec3d delta_eci = vec3d_add( + vec3d_scale(client->rhat, x), + vec3d_add(vec3d_scale(client->vhat, y), vec3d_scale(client->hhat, z))); + return eci_to_render(delta_eci, overlay_m_per_unit); +} + +static Vector3 lvlh_to_orbit_render_clamped(Client *client, + double x, double y, double z, + float station_m_per_unit, + float overlay_m_per_unit, + double max_range_m, + bool *clipped_out) { + double r = sqrt(x*x + y*y + z*z); + double k = 1.0; + bool clipped = false; + if (r > max_range_m && r > 1e-6) { + k = max_range_m / r; + clipped = true; + } + if (clipped_out) *clipped_out = clipped; + return lvlh_to_orbit_overlay_render(client, x * k, y * k, z * k, + station_m_per_unit, overlay_m_per_unit); +} + +static void draw_stars(Client *client) { + for (int i = 0; i < NUM_STARS; i++) { + Vector3 p = (Vector3){ + (float)(client->stars[i].x * client->star_radius), + (float)(client->stars[i].y * client->star_radius), + (float)(client->stars[i].z * client->star_radius) + }; + unsigned char b = (unsigned char)(180 + (i * 73) % 76); + DrawCubeV(p, (Vector3){1.00f, 1.00f, 1.00f}, + (Color){b, b, b, 255}); + } +} + +void render_orbital_dock(OrbitalDock *env, Client *client) { + if (WindowShouldClose() || IsKeyDown(KEY_ESCAPE)) { + close_client(client); + exit(0); + } + + handle_camera_controls(client); + WaitTime(0.033); // ~30fps max + + update_orbital_basis(client, env); + const double earth_spin_rate = 7.2921159e-5; // rad/s + client->earth_spin_angle = fmod( + (double)env->global_step * env->dt * earth_spin_rate + * VISUAL_SPIN_TIME_SCALE * VISUAL_EARTH_SPIN_MULT, + 2.0 * M_PI + ); + + // CW state is already in LVLH relative to station at origin + double dist = sqrt( + (env->x - env->dock_x) * (env->x - env->dock_x) + + (env->y - env->dock_y) * (env->y - env->dock_y) + + (env->z - env->dock_z) * (env->z - env->dock_z)); + double rel_speed = sqrt(env->vx*env->vx + env->vy*env->vy + env->vz*env->vz); + + // Update local LVLH trail for docking mode + if (env->step_count < client->last_step || env->step_count == 0) { + client->trail.count = 0; + client->trail.index = 0; + } + client->last_step = env->step_count; + + if (env->step_count % 5 == 0) { + client->trail.pos[client->trail.index] = vec3d(env->x, env->y, env->z); + client->trail.index = (client->trail.index + 1) % TRAIL_LENGTH; + if (client->trail.count < TRAIL_LENGTH) { + client->trail.count++; + } + } + + float act_x = env->actions[0]; // R-bar + float act_y = env->actions[1]; // V-bar + float act_z = env->actions[2]; // H-bar + + // Station position in the far-orbit visual scale (used by docking mode). + Vector3 station_far = eci_to_render(client->rs, client->far_scale); + float lvlh_scale_overlay = LVLH_OVERLAY_SCALE_M; + + BeginDrawing(); + ClearBackground(BLACK); + + update_camera_position(client, station_far); + + BeginMode3D(client->camera); + + // DOCKING view: Earth-centered, LVLH frame moves along station orbit. + float orbit_radius = (float)(env->station_radius / client->far_scale); + float earth_radius = orbit_radius * (float)(EARTH_RADIUS_M / GEO_RADIUS_M); + + draw_stars(client); + draw_earth_model(client, (Vector3){0, 0, 0}, earth_radius); + draw_orbit_ring_scaled(env, client->far_scale, (Color){85, 115, 165, 95}); + draw_station_trail_scaled(client, env, client->far_scale, (Color){0, 230, 255, 255}); + + // Station marker (match old LVLH cue) + DrawCube(station_far, 0.45f, 0.45f, 0.45f, PUFF_CYAN); + DrawCubeWires(station_far, 0.58f, 0.58f, 0.58f, PUFF_WHITE); + + // LVLH triad at station in ECI-aligned render coordinates. + { + float axis_len = 6.0f; + double axis_m = axis_len * client->far_scale; + Vector3 r_axis = eci_to_render(vec3d_add(client->rs, vec3d_scale(client->rhat, axis_m)), client->far_scale); + Vector3 v_axis = eci_to_render(vec3d_add(client->rs, vec3d_scale(client->vhat, axis_m)), client->far_scale); + Vector3 h_axis = eci_to_render(vec3d_add(client->rs, vec3d_scale(client->hhat, axis_m)), client->far_scale); + DrawLine3D(station_far, v_axis, PUFF_GREEN); + DrawLine3D(station_far, h_axis, PUFF_CYAN); + DrawLine3D(station_far, r_axis, PUFF_YELLOW); + DrawSphere(v_axis, 0.18f, PUFF_GREEN); + DrawSphere(h_axis, 0.18f, PUFF_CYAN); + DrawSphere(r_axis, 0.18f, PUFF_YELLOW); + } + + Vector3 dock_pos = lvlh_to_orbit_overlay_render(client, env->dock_x, env->dock_y, env->dock_z, + client->far_scale, lvlh_scale_overlay); + DrawSphere(dock_pos, 0.24f, PUFF_GREEN); + + Vector3 chaser_pos = lvlh_to_orbit_render_clamped(client, env->x, env->y, env->z, + client->far_scale, lvlh_scale_overlay, + LVLH_VIS_MAX_M, + NULL); + DrawSphere(chaser_pos, 0.32f, PUFF_WHITE); + + // Purple heading/velocity cue: always draw from visible chaser marker. + if (rel_speed > 0.001) { + double vnorm = 1.0 / rel_speed; + double hx = env->vx * vnorm; + double hy = env->vy * vnorm; + double hz = env->vz * vnorm; + Vector3 hdelta = lvlh_overlay_delta_render(client, + hx * 160.0, hy * 160.0, hz * 160.0, lvlh_scale_overlay); + Vector3 heading_end = { + chaser_pos.x + hdelta.x, + chaser_pos.y + hdelta.y, + chaser_pos.z + hdelta.z + }; + DrawLine3D(chaser_pos, heading_end, PUFF_MAGENTA); + DrawSphere(heading_end, 0.12f, PUFF_MAGENTA); + } + + float act_mag = sqrtf(act_x*act_x + act_y*act_y + act_z*act_z); + // Orange thrust cue: always draw from visible chaser marker. + if (act_mag > 0.02f) { + Vector3 cdelta = lvlh_overlay_delta_render(client, + act_x * 160.0, act_y * 160.0, act_z * 160.0, lvlh_scale_overlay); + Vector3 cmd_end = { + chaser_pos.x + cdelta.x, + chaser_pos.y + cdelta.y, + chaser_pos.z + cdelta.z + }; + DrawLine3D(chaser_pos, cmd_end, PUFF_ORANGE); + DrawSphere(cmd_end, 0.20f, PUFF_ORANGE); + } + + float dock_zone_radius = fmaxf(0.12f, (float)(env->dock_dist / lvlh_scale_overlay)); + DrawSphereWires(dock_pos, dock_zone_radius, 12, 12, (Color){0, 255, 0, 100}); + + // LOS cone (wireframe from dock point along +V-bar in LVLH) + { + double ha = env->los_half_angle; + double extent = env->los_extent; + int n_seg = 32; + int n_rings = 5; + int n_edges = 12; + Color cone_color = {0, 220, 80, 220}; + + for (int ri = 1; ri <= n_rings; ri++) { + double d = extent * ri / n_rings; + double r = d * tan(ha); + for (int i = 0; i < n_seg; i++) { + double a0 = 2.0 * M_PI * i / n_seg; + double a1 = 2.0 * M_PI * (i + 1) / n_seg; + Vector3 p0 = lvlh_to_orbit_overlay_render(client, + env->dock_x + r * cos(a0), + env->dock_y + d, + env->dock_z + r * sin(a0), + client->far_scale, lvlh_scale_overlay); + Vector3 p1 = lvlh_to_orbit_overlay_render(client, + env->dock_x + r * cos(a1), + env->dock_y + d, + env->dock_z + r * sin(a1), + client->far_scale, lvlh_scale_overlay); + DrawLine3D(p0, p1, cone_color); + if ((ri == n_rings) && (i % 8 == 0)) { + DrawSphere(p0, 0.06f, (Color){0, 230, 90, 255}); + } + } + } + + double r_end = extent * tan(ha); + for (int i = 0; i < n_edges; i++) { + double a = 2.0 * M_PI * i / n_edges; + Vector3 rim = lvlh_to_orbit_overlay_render(client, + env->dock_x + r_end * cos(a), + env->dock_y + extent, + env->dock_z + r_end * sin(a), + client->far_scale, lvlh_scale_overlay); + DrawLine3D(dock_pos, rim, cone_color); + } + } + + // Chaser trail in LVLH, projected through current LVLH->ECI transform. + if (client->trail.count > 2) { + for (int j = 0; j < client->trail.count - 1; j++) { + int idx0 = (client->trail.index - j - 1 + TRAIL_LENGTH) % TRAIL_LENGTH; + int idx1 = (client->trail.index - j - 2 + TRAIL_LENGTH) % TRAIL_LENGTH; + + float age_factor = (float)j / (float)client->trail.count; + unsigned char brightness = (unsigned char)(187 * (1.0f - age_factor * 0.8f)); + Color trail_color = {0, brightness, brightness, 255}; + + Vec3d p0_lvlh = client->trail.pos[idx0]; + Vec3d p1_lvlh = client->trail.pos[idx1]; + Vector3 p0 = lvlh_to_orbit_render_clamped(client, p0_lvlh.x, p0_lvlh.y, p0_lvlh.z, + client->far_scale, lvlh_scale_overlay, + LVLH_VIS_MAX_M, NULL); + Vector3 p1 = lvlh_to_orbit_render_clamped(client, p1_lvlh.x, p1_lvlh.y, p1_lvlh.z, + client->far_scale, lvlh_scale_overlay, + LVLH_VIS_MAX_M, NULL); + DrawLine3D(p0, p1, trail_color); + } + } + + EndMode3D(); + + // HUD + int y = 20; + int dy = 22; + DrawText(TextFormat("Distance to dock: %.1f m", dist), 20, y, 20, PUFF_WHITE); y += dy; + DrawText(TextFormat("Rel Speed: %.2f m/s", rel_speed), 20, y, 20, PUFF_WHITE); y += dy; + DrawText(TextFormat("Fuel: %.1f%%", 100.0 * env->fuel / env->fuel_budget), 20, y, 20, PUFF_WHITE); y += dy; + DrawText(TextFormat("Step: %d / %d", env->step_count, env->max_steps), 20, y, 20, PUFF_WHITE); y += dy; + + y += 8; + DrawText(TextFormat("Altitude: %.0f km | Phase: %.1f deg", + env->station_radius / 1000.0 - 6371.0, + client->theta * 180.0 / M_PI), + 20, y, 18, (Color){190, 190, 210, 255}); y += dy; + + y += 4; + DrawText(TextFormat("Pos: [%.0f, %.0f, %.0f] m", env->x, env->y, env->z), + 20, y, 18, (Color){150, 150, 150, 255}); y += dy - 4; + DrawText(TextFormat("Vel: [%.2f, %.2f, %.2f] m/s", env->vx, env->vy, env->vz), + 20, y, 18, (Color){150, 150, 150, 255}); y += dy; + + DrawText("Thrust Cmd:", 20, y, 20, PUFF_ORANGE); y += dy; + DrawText(TextFormat(" R-bar: %+.0f%%", act_x * 100.0f), 20, y, 18, + act_x > 0.05 ? PUFF_GREEN : (act_x < -0.05 ? PUFF_RED : PUFF_WHITE)); y += dy - 4; + DrawText(TextFormat(" V-bar: %+.0f%%", act_y * 100.0f), 20, y, 18, + act_y > 0.05 ? PUFF_GREEN : (act_y < -0.05 ? PUFF_RED : PUFF_WHITE)); y += dy - 4; + DrawText(TextFormat(" H-bar: %+.0f%%", act_z * 100.0f), 20, y, 18, + act_z > 0.05 ? PUFF_GREEN : (act_z < -0.05 ? PUFF_RED : PUFF_WHITE)); y += dy; + + int rx = RENDER_WIDTH - 230; + y = RENDER_HEIGHT - 120; + DrawText("LVLH Axes:", rx, y, 18, PUFF_WHITE); y += 20; + DrawText(" V-bar (prograde)", rx, y, 16, PUFF_GREEN); y += 18; + DrawText(" H-bar (normal)", rx, y, 16, PUFF_CYAN); y += 18; + DrawText(" R-bar (radial)", rx, y, 16, PUFF_YELLOW); + + DrawText("Left click + drag: Rotate camera", 20, RENDER_HEIGHT - 56, 16, + (Color){150, 150, 150, 255}); + DrawText("Mouse wheel: Zoom | ESC: Exit", + 20, RENDER_HEIGHT - 34, 16, (Color){150, 150, 150, 255}); + + if (dist < env->dock_dist && rel_speed < env->dock_speed) { + DrawText("DOCKED!", RENDER_WIDTH/2 - 60, RENDER_HEIGHT/2 - 20, 40, PUFF_GREEN); + } else if (dist < env->dock_dist) { + DrawText("TOO FAST!", RENDER_WIDTH/2 - 70, RENDER_HEIGHT/2 - 20, 40, PUFF_RED); + } + + EndDrawing(); +} + +#endif // ORBITAL_DOCK_RENDER_H diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index c7663c5f5b..531f05f23c 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -826,6 +826,99 @@ def decode_actions(self, flat_hidden): value = self.value_fn(flat_hidden) return action, value +class RunningNorm(nn.Module): + '''Running mean/std observation normalization (Chen et al. 2023). + Tracks statistics during training, normalizes to zero mean / unit variance. + ''' + def __init__(self, shape, clip=10.0): + super().__init__() + self.register_buffer('mean', torch.zeros(shape)) + self.register_buffer('var', torch.ones(shape)) + self.register_buffer('count', torch.tensor(1e-4)) + self.clip = clip + + def update(self, x): + batch_mean = x.mean(dim=0) + batch_var = x.var(dim=0, unbiased=False) + batch_count = x.shape[0] + delta = batch_mean - self.mean + total = self.count + batch_count + self.mean = self.mean + delta * batch_count / total + m_a = self.var * self.count + m_b = batch_var * batch_count + m2 = m_a + m_b + delta**2 * self.count * batch_count / total + self.var = m2 / total + self.count = total + + def forward(self, x): + if self.training: + self.update(x.detach()) + return torch.clamp((x - self.mean) / torch.sqrt(self.var + 1e-8), + -self.clip, self.clip) + + +class OrbitalDock(pufferlib.models.Default): + '''Separate actor/critic for orbital docking + + Actor: encoder -> action mean + Critic: independent encoder -> value (no shared weights with actor) + Running observation normalization (`RunningNorm`) applied to both paths. + Obs: 10 values [x, y, z, vx, vy, vz, dist, speed, closing_vel, time_remaining]. + ''' + def __init__(self, env, hidden_size=128, **kwargs): + super().__init__(env, hidden_size=hidden_size, **kwargs) + + num_obs = int(np.prod(env.single_observation_space.shape)) + + # Fix actor encoder to 2 layers (Default only creates 1 layer) + self.encoder = nn.Sequential( + pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), + nn.GELU(), + pufferlib.pytorch.layer_init(nn.Linear(hidden_size, hidden_size)), + nn.GELU(), + ) + + # Running observation normalization (SB3 VecNormalize: norm_obs=True) + self.obs_norm = RunningNorm(num_obs, clip=10.0) + + # Separate critic network (independent of actor encoder) + self.critic_encoder = nn.Sequential( + pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), + nn.GELU(), + pufferlib.pytorch.layer_init(nn.Linear(hidden_size, hidden_size)), + nn.GELU(), + ) + self.critic_value = pufferlib.pytorch.layer_init( + nn.Linear(hidden_size, 1), std=1) + + if self.is_continuous: + nn.init.constant_(self.decoder_logstd, -1.0) + + def encode_observations(self, observations, state=None): + batch_size = observations.shape[0] + obs_flat = observations.view(batch_size, -1).float() + # Normalize observations (running mean/std) + obs_normed = self.obs_norm(obs_flat) + self._last_obs_normed = obs_normed + # Actor encoder + return self.encoder(obs_normed) + + def decode_actions(self, hidden): + if self.is_continuous: + mean = self.decoder_mean(hidden) + logstd = self.decoder_logstd.expand_as(mean) + std = torch.exp(logstd) + logits = torch.distributions.Normal(mean, std) + else: + logits = self.decoder(hidden) + + # Critic: own encoder on normalized observations (no shared weights) + critic_hidden = self.critic_encoder(self._last_obs_normed) + values = self.critic_value(critic_hidden) + + return logits, values + + class Drone(nn.Module): ''' Drone policy. Flattens obs and applies a linear layer. ''' diff --git a/pufferlib/resources/orbital_dock/ps1_style_low_poly_earth.glb b/pufferlib/resources/orbital_dock/ps1_style_low_poly_earth.glb new file mode 100644 index 0000000000..0984bae45a Binary files /dev/null and b/pufferlib/resources/orbital_dock/ps1_style_low_poly_earth.glb differ