diff --git a/baselines/logoplanner/Pi3 b/baselines/logoplanner/Pi3 index 0a505af..b412c3b 160000 --- a/baselines/logoplanner/Pi3 +++ b/baselines/logoplanner/Pi3 @@ -1 +1 @@ -Subproject commit 0a505af9c261a7c8e44946fba5b1942cdc25914b +Subproject commit b412c3bd236dfd7686f1e4b48004d5087f2fa093 diff --git a/baselines/logoplanner/logoplanner_navbench_adapter.py b/baselines/logoplanner/logoplanner_navbench_adapter.py new file mode 100644 index 0000000..c80f4d5 --- /dev/null +++ b/baselines/logoplanner/logoplanner_navbench_adapter.py @@ -0,0 +1,331 @@ +"""LoGoPlanner adapter for NavArena-Bench PointNav evaluation. + +This adapter bridges LoGoPlanner's HTTP REST server (logoplanner_server.py) +with NavArena-Bench's WebSocket NavigationModelServer interface, enabling +LoGoPlanner to be evaluated on the NavArena-Bench PointNav benchmark. + +Architecture: + NavArena-Bench runner ──WebSocket──► this adapter ──HTTP──► logoplanner_server.py + │ + LoGoPlanner_Agent + (GPU inference, CUDA) + +Usage: + # Step 1 – start the LoGoPlanner HTTP server (GPU machine): + # cd NavDP/baselines/logoplanner + # python logoplanner_server.py --port 43020 --checkpoint ./logoplanner_policy.ckpt + # + # Step 2 – run this adapter (same or different machine): + # python logoplanner_navbench_adapter.py \\ + # --server-url http://127.0.0.1:43020 \\ + # --port 8000 + +Coordinate conventions: + - NavArena-Bench uses ENU world frame: yaw=0 → +X (East), CCW positive. + - Robot body frame: +X = forward, +Y = left. + - LoGoPlanner goal: (goal_x=forward, goal_y=lateral) in robot body frame. + - World → robot-body rotation (computed once at episode start): + fwd = cos(yaw0)*dx + sin(yaw0)*dy + lat = -sin(yaw0)*dx + cos(yaw0)*dy + +LoGoPlanner is designed to do implicit localization from its visual-geometry +backbone + history queues, so the goal is expressed in the episode's *starting* +body frame and kept fixed thereafter — the model infers how far it has moved +on its own. We therefore compute the body-frame goal once on the first +prediction of each episode and reuse it, matching lekiwi_logoplanner_host.py. +""" + +from __future__ import annotations + +import argparse +import io +import json +import math + +import numpy as np +import requests +from PIL import Image + +from navarena_server.server import NavigationModelServer, serve + + +class LoGoPlannerNavBenchAdapter(NavigationModelServer): + """NavArena-Bench PointNav agent backed by a running logoplanner_server.py.""" + + def __init__( + self, + server_url: str, + camera_intrinsic: list[list[float]], + stop_threshold: float = 0.3, + rgb_camera_key: str | None = None, + depth_camera_key: str | None = None, + request_timeout: float = 30.0, + ) -> None: + """ + Args: + server_url: Base URL of logoplanner_server.py, e.g. "http://127.0.0.1:43020". + camera_intrinsic: 3×3 camera intrinsic matrix as nested list [[fx,0,cx],[0,fy,cy],[0,0,1]]. + Used by LoGoPlanner's trajectory visualisation; adjust to match your scenes. + stop_threshold: Value passed to /navigator_reset; LoGoPlanner issues a rotation-only + command when max trajectory value is below this threshold. + rgb_camera_key: Key into observation["rgb"] to pick the camera image (default: first key). + depth_camera_key: Key into observation["depth"] (default: first key). + request_timeout: HTTP request timeout in seconds. + """ + self.server_url = server_url.rstrip("/") + self.camera_intrinsic = camera_intrinsic + self.stop_threshold = stop_threshold + self.rgb_camera_key = rgb_camera_key + self.depth_camera_key = depth_camera_key + self.request_timeout = request_timeout + self._goal_position: list[float] | None = None + self._goal_robot_frame: tuple[float, float] | None = None + + # ------------------------------------------------------------------ # + # Lifecycle hooks # + # ------------------------------------------------------------------ # + + async def on_episode_start(self, payload: dict, ctx) -> None: + """Reset LoGoPlanner's memory queues and cache the goal for this episode.""" + task = payload.get("task", payload) + goals = task.get("goals") or [] + if goals and isinstance(goals[0], dict) and goals[0].get("position") is not None: + self._goal_position = list(goals[0]["position"]) + else: + self._goal_position = None + self._goal_robot_frame = None + print(f"[adapter] episode_start: goal={self._goal_position}", flush=True) + + resp = requests.post( + f"{self.server_url}/navigator_reset", + json={ + "intrinsic": self.camera_intrinsic, + "stop_threshold": self.stop_threshold, + "batch_size": 1, + }, + timeout=self.request_timeout, + ) + resp.raise_for_status() + + async def on_episode_end(self, result: dict, ctx) -> None: + """Finalize the per-episode mp4 so the last episode isn't left unclosed.""" + try: + requests.post( + f"{self.server_url}/finalize", + timeout=self.request_timeout, + ) + except Exception as exc: + print(f"[adapter] finalize failed: {exc}", flush=True) + + # ------------------------------------------------------------------ # + # Prediction # + # ------------------------------------------------------------------ # + + async def predict(self, observation: dict, ctx) -> dict: + """Convert a NavBench observation to a LoGoPlanner HTTP call and return an action. + + Returns: + dict with keys {"x", "y", "yaw"}: + x – forward displacement in robot frame (metres) + y – lateral displacement in robot frame (metres, positive = left) + yaw – rotation increment (radians); always 0 for omnidirectional mode + """ + rgb_buf, depth_buf = self._encode_images(observation) + + if self._goal_robot_frame is None: + self._goal_robot_frame = self._compute_goal_robot_frame(observation) + print( + f"[adapter] initial body-frame goal " + f"=({self._goal_robot_frame[0]:+.3f},{self._goal_robot_frame[1]:+.3f}) " + f"(fixed for the rest of the episode)", + flush=True, + ) + goal_x, goal_y = self._goal_robot_frame + + traj = self._call_pointgoal_step(rgb_buf, depth_buf, goal_x, goal_y) + + # traj: list of shape (batch_size=1, predict_size, 3) + traj_arr = np.asarray(traj) + print( + f"[adapter] goal_robot=({goal_x:+.3f},{goal_y:+.3f}) " + f"traj.shape={traj_arr.shape} " + f"wp[0]={traj_arr[0, 0].round(3).tolist()} " + f"wp[4]={traj_arr[0, 4].round(3).tolist()} " + f"wp[-1]={traj_arr[0, -1].round(3).tolist()} " + f"fwd_range=[{traj_arr[0, :, 0].min():+.3f},{traj_arr[0, :, 0].max():+.3f}] " + f"lat_range=[{traj_arr[0, :, 1].min():+.3f},{traj_arr[0, :, 1].max():+.3f}]", + flush=True, + ) + + # Take the 5th waypoint of the first (only) environment. + first_wp = traj[0][4] + fwd = float(first_wp[0]) + lat = float(first_wp[1]) + yaw = float(first_wp[2]) + + return {"waypoints": [{"x": fwd, "y": lat, "yaw": yaw}]} + + # ------------------------------------------------------------------ # + # Internal helpers # + # ------------------------------------------------------------------ # + + def _encode_images(self, observation: dict) -> tuple[io.BytesIO, io.BytesIO]: + """Encode RGB and depth observations into PNG byte buffers for multipart upload.""" + # ── RGB: support both PIL.Image and numpy.ndarray ─────────────────── + rgb_dict: dict = observation.get("rgb", {}) + rgb_key = self.rgb_camera_key or next(iter(rgb_dict)) + rgb_raw = rgb_dict[rgb_key] + if isinstance(rgb_raw, Image.Image): + rgb_np = np.asarray(rgb_raw.convert("RGB")) + else: + rgb_np = np.asarray(rgb_raw) + if rgb_np.ndim == 2: + rgb_np = np.stack([rgb_np, rgb_np, rgb_np], axis=-1) + if rgb_np.shape[-1] == 4: + rgb_np = rgb_np[:, :, :3] + if rgb_np.dtype != np.uint8: + rgb_np = np.clip(rgb_np, 0, 255).astype(np.uint8) + + # NavBench observations are typically RGB; LoGoPlanner expects BGR. + rgb_bgr = rgb_np[:, :, ::-1].copy() # H×W×3 uint8, BGR + rgb_buf = io.BytesIO() + Image.fromarray(rgb_bgr, mode="RGB").save(rgb_buf, format="PNG") + rgb_buf.seek(0) + + # ── Depth: support both PIL.Image and numpy.ndarray ───────────────── + depth_dict: dict = observation.get("depth", {}) + depth_key = self.depth_camera_key or next(iter(depth_dict)) + depth_raw = depth_dict[depth_key] + depth_arr: np.ndarray = np.asarray(depth_raw, dtype=np.float32) + if depth_arr.ndim == 3: + depth_arr = depth_arr[:, :, 0] # (H, W) + depth_int = (depth_arr * 10000).astype(np.int32) # metres → 0.1 mm integer + depth_buf = io.BytesIO() + Image.fromarray(depth_int, mode="I").save(depth_buf, format="PNG") + depth_buf.seek(0) + + return rgb_buf, depth_buf + + def _compute_goal_robot_frame(self, observation: dict) -> tuple[float, float]: + """Transform the world-frame goal into the episode's *starting* body frame. + + Called exactly once per episode, on the first prediction. LoGoPlanner + handles the shift from start-frame to current-frame internally via its + implicit localization backbone. + + NavBench ENU convention (matches ROS standard): + yaw = 0 → robot faces +X (East) + yaw increases counter-clockwise + + Robot body frame: + +X = forward, +Y = left + + Rotation (world → start-body): + fwd = cos(yaw0)*dx + sin(yaw0)*dy + lat = -sin(yaw0)*dx + cos(yaw0)*dy + """ + pose = observation.get("pose", {}) + robot_pos = pose.get("position", [0.0, 0.0, 0.0]) + robot_yaw: float = pose.get("yaw", 0.0) + + if self._goal_position is not None: + goal_pos = self._goal_position + else: + goal = observation.get("goal", {}) + goal_pos = goal.get("position", [0.0, 0.0, 0.0]) + + dx = goal_pos[0] - robot_pos[0] + dy = goal_pos[1] - robot_pos[1] + + cos_y = math.cos(robot_yaw) + sin_y = math.sin(robot_yaw) + goal_fwd = cos_y * dx + sin_y * dy + goal_lat = -sin_y * dx + cos_y * dy + + return goal_fwd, goal_lat + + def _call_pointgoal_step( + self, + rgb_buf: io.BytesIO, + depth_buf: io.BytesIO, + goal_x: float, + goal_y: float, + ) -> list: + """POST to /pointgoal_step and return the parsed trajectory list.""" + files = { + "image": ("image.png", rgb_buf, "image/png"), + "depth": ("depth.png", depth_buf, "image/png"), + } + data = { + "goal_data": json.dumps({ + "goal_x": [goal_x], # list of length batch_size=1 + "goal_y": [goal_y], + }) + } + resp = requests.post( + f"{self.server_url}/pointgoal_step", + files=files, + data=data, + timeout=self.request_timeout, + ) + resp.raise_for_status() + return resp.json()["trajectory"] + + +# ──────────────────────────────────────────────────────────────────────── # +# CLI entry point # +# ──────────────────────────────────────────────────────────────────────── # + +def _build_intrinsic(fx: float, fy: float, cx: float, cy: float) -> list[list[float]]: + return [[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="LoGoPlanner → NavArena-Bench PointNav adapter", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--server-url", default="http://127.0.0.1:43020", + help="URL of the running logoplanner_server.py", + ) + parser.add_argument("--host", default="0.0.0.0", help="WebSocket bind host") + parser.add_argument("--port", type=int, default=8000, help="WebSocket bind port") + parser.add_argument( + "--stop-threshold", type=float, default=-10.0, + help="LoGoPlanner stop threshold forwarded to /navigator_reset", + ) + # Camera intrinsic (for LoGoPlanner trajectory visualisation). + # Default matches the NavArena-Bench gs_env default camera (fx=fy=320, cx=320, cy=240). + parser.add_argument("--fx", type=float, default=320.0, help="Camera intrinsic fx") + parser.add_argument("--fy", type=float, default=320.0, help="Camera intrinsic fy") + parser.add_argument("--cx", type=float, default=320.0, help="Camera intrinsic cx (principal point x)") + parser.add_argument("--cy", type=float, default=240.0, help="Camera intrinsic cy (principal point y)") + parser.add_argument( + "--rgb-key", default=None, + help="Camera key in observation['rgb'] (default: first available key)", + ) + parser.add_argument( + "--depth-key", default=None, + help="Camera key in observation['depth'] (default: first available key)", + ) + parser.add_argument("--timeout", type=float, default=30.0, help="HTTP request timeout (seconds)") + args = parser.parse_args() + + intrinsic = _build_intrinsic(args.fx, args.fy, args.cx, args.cy) + adapter = LoGoPlannerNavBenchAdapter( + server_url=args.server_url, + camera_intrinsic=intrinsic, + stop_threshold=args.stop_threshold, + rgb_camera_key=args.rgb_key, + depth_camera_key=args.depth_key, + request_timeout=args.timeout, + ) + + print(f"LoGoPlanner server : {args.server_url}") + print(f"NavBench WebSocket : ws://{args.host}:{args.port}") + serve(adapter, host=args.host, port=args.port) + + +if __name__ == "__main__": + main() diff --git a/baselines/logoplanner/logoplanner_server.py b/baselines/logoplanner/logoplanner_server.py index 401c41c..071a1ab 100644 --- a/baselines/logoplanner/logoplanner_server.py +++ b/baselines/logoplanner/logoplanner_server.py @@ -12,7 +12,7 @@ from PIL import Image, ImageDraw, ImageFont import argparse parser = argparse.ArgumentParser() -parser.add_argument("--port",type=int,default=8888) +parser.add_argument("--port",type=int,default=43020) parser.add_argument("--checkpoint",type=str,default="./logoplanner_policy.ckpt") args = parser.parse_known_args()[0] @@ -57,6 +57,14 @@ def logoplanner_reset_env(): logoplanner_navigator.reset_env(int(request.get_json().get('env_id'))) return jsonify({"algo":"logoplanner"}) +@app.route("/finalize",methods=['POST']) +def logoplanner_finalize(): + global logoplanner_fps_writer + if logoplanner_fps_writer is not None: + logoplanner_fps_writer.close() + logoplanner_fps_writer = None + return jsonify({"algo":"logoplanner","status":"closed"}) + @app.route("/pointgoal_step",methods=['POST']) def logoplanner_step_xy(): global logoplanner_navigator,logoplanner_fps_writer diff --git a/baselines/logoplanner/policy_network.py b/baselines/logoplanner/policy_network.py index 46e8cc3..abafe29 100644 --- a/baselines/logoplanner/policy_network.py +++ b/baselines/logoplanner/policy_network.py @@ -126,9 +126,7 @@ def predict_pointgoal_action(self,start_goal,memory_rgbd,context_rgbd,sample_num all_trajectory = torch.cumsum(naction / 4.0, dim=1) all_trajectory = all_trajectory.reshape(start_goal.shape[0],sample_num,self.predict_size,3) - trajectory_length = all_trajectory[:,:,-1,0:2].norm(dim=-1) - all_trajectory[trajectory_length < 0.5] = all_trajectory[trajectory_length < 0.5] * torch.tensor([[[0,0,1.0]]],device=all_trajectory.device) - + sorted_indices = (-critic_values).argsort(dim=1) topk_indices = sorted_indices[:,0:2] batch_indices = torch.arange(start_goal.shape[0]).unsqueeze(1).expand(-1, 2)