diff --git a/.gitignore b/.gitignore index e1da9df7..1e58d3a5 100644 --- a/.gitignore +++ b/.gitignore @@ -150,3 +150,5 @@ logs/ /results/ checkpoints internnav/model/basemodel/LongCLIP/ +.gradio/ +result/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 78dcf9a0..47e3a487 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: - id: trailing-whitespace - id: check-yaml - id: end-of-file-fixer - - id: requirements-txt-fixer + # - id: requirements-txt-fixer - id: check-merge-conflict - id: fix-encoding-pragma args: ["--remove"] diff --git a/internnav/agent/base.py b/internnav/agent/base.py index 42bf6c70..a68626f6 100644 --- a/internnav/agent/base.py +++ b/internnav/agent/base.py @@ -10,10 +10,10 @@ def __init__(self, config: AgentCfg): self.config = config def step(self, obs: Dict[str, Any]): - pass + raise NotImplementedError("This function is not implemented yet.") def reset(self): - pass + raise NotImplementedError("This function is not implemented yet.") @classmethod def register(cls, agent_type: str): diff --git a/internnav/agent/rdp_agent.py b/internnav/agent/rdp_agent.py index c3dc6678..1bd1e72d 100644 --- a/internnav/agent/rdp_agent.py +++ b/internnav/agent/rdp_agent.py @@ -9,7 +9,6 @@ from internnav.configs.agent import AgentCfg from internnav.configs.model.base_encoders import ModelCfg from internnav.model import get_config, get_policy -from internnav.model.basemodel.LongCLIP.model import longclip from internnav.model.basemodel.rdp.utils import ( FixedLengthStack, compute_actions, @@ -19,7 +18,6 @@ quat_to_euler_angles, to_local_coords_batch, ) -from internnav.model.utils.bert_token import BertTokenizer from internnav.model.utils.feature_extract import ( extract_image_features, extract_instruction_tokens, @@ -67,6 +65,8 @@ def __init__(self, config: AgentCfg): if self.use_clip_encoders: if self._model_settings.text_encoder.type == 'roberta': + from internnav.model.utils.bert_token import BertTokenizer + self.bert_tokenizer = BertTokenizer( max_length=self._model_settings.instruction_encoder.max_length, load_model=self._model_settings.instruction_encoder.load_model, @@ -74,6 +74,8 @@ def __init__(self, config: AgentCfg): ) self.use_bert = True elif self._model_settings.text_encoder.type == 'clip-long': + from internnav.model.basemodel.LongCLIP.model import longclip + self.bert_tokenizer = longclip.tokenize self.use_bert = True self.is_clip_long = True diff --git a/internnav/agent/simple_agent.py b/internnav/agent/simple_agent.py new file mode 100644 index 00000000..54ecca18 --- /dev/null +++ b/internnav/agent/simple_agent.py @@ -0,0 +1,54 @@ +import time +from typing import Any, Dict + +import torch + +from internnav.agent import Agent +from internnav.configs.agent import AgentCfg +from internnav.model import get_config, get_policy + + +class SimpleAgent(Agent): + """ + agent template, override the functions for custom policy + """ + + def __init__(self, agent_config: AgentCfg): + self.agent_config = agent_config + self.device = torch.device('cuda', 0) + + # get policy by name + policy = get_policy(agent_config.model_settings.policy_name) + + # load policy checkpoints + self.policy = policy.from_pretrained( + agent_config.ckpt_path, + config=get_config(agent_config.model_settings.policy_name)( + model_cfg={'model': agent_config.model_settings.model_dump()} + ), + ).to(self.device) + + def convert_input(self, obs): + return obs + + def convert_output(self, action): + return action + + def inference(self, input): + return self.policy(input) + + def step(self, obs: Dict[str, Any]): + print(f'{self.config.model_name} Agent step') + start = time.time() + + # convert obs to model input + obs = self.convert_input(obs) + action = self.inference(obs) + action = self.convert_output(action) + + end = time.time() + print(f'time: {round(end-start, 4)}s') + return action + + def reset(self): + pass diff --git a/internnav/evaluator/utils/vln_default_config.py b/internnav/configs/evaluator/vln_default_config.py similarity index 100% rename from internnav/evaluator/utils/vln_default_config.py rename to internnav/configs/evaluator/vln_default_config.py diff --git a/internnav/configs/trainer/__init__.py b/internnav/configs/trainer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/internnav/env/utils/internutopia_extension/configs/sensors/__init__.py b/internnav/env/utils/internutopia_extension/configs/sensors/__init__.py new file mode 100644 index 00000000..de11a1e6 --- /dev/null +++ b/internnav/env/utils/internutopia_extension/configs/sensors/__init__.py @@ -0,0 +1 @@ +from .vln_camera import VLNCameraCfg diff --git a/internnav/env/utils/internutopia_extension/configs/tasks/__init__.py b/internnav/env/utils/internutopia_extension/configs/tasks/__init__.py new file mode 100644 index 00000000..b9ccb1eb --- /dev/null +++ b/internnav/env/utils/internutopia_extension/configs/tasks/__init__.py @@ -0,0 +1 @@ +from .vln_eval_task import VLNEvalTaskCfg diff --git a/internnav/evaluator/utils/common.py b/internnav/evaluator/utils/common.py index 5e6011b7..004801b9 100644 --- a/internnav/evaluator/utils/common.py +++ b/internnav/evaluator/utils/common.py @@ -6,7 +6,6 @@ from collections import defaultdict import numpy as np -from internutopia.core.util import is_in_container from PIL import Image, ImageDraw from scipy.ndimage import binary_dilation @@ -243,6 +242,8 @@ def load_data(dataset_root_dir, split, filter_same_trajectory=True, filter_stair def load_scene_usd(mp3d_data_dir, scan): """Load scene USD based on the scan""" + from internutopia.core.util import is_in_container + find_flag = False for root, dirs, files in os.walk(os.path.join(mp3d_data_dir, scan)): target_file_name = 'fixed_docker.usd' if is_in_container() else 'fixed.usd' diff --git a/internnav/evaluator/utils/eval.py b/internnav/evaluator/utils/eval.py index 75cc2198..9e6c4474 100644 --- a/internnav/evaluator/utils/eval.py +++ b/internnav/evaluator/utils/eval.py @@ -1,14 +1,4 @@ -from internutopia.core.config.robot import ControllerCfg -from internutopia_extension.configs.robots.h1 import H1RobotCfg -from internutopia_extension.configs.sensors import RepCameraCfg - from internnav.configs.evaluator import EvalCfg -from internnav.env.utils.internutopia_extension.configs.metrics.vln_pe_metrics import ( - VLNPEMetricCfg, -) -from internnav.env.utils.internutopia_extension.configs.tasks.vln_eval_task import ( - VLNEvalTaskCfg, -) from internnav.evaluator.utils.common import load_kujiale_scene_usd, load_scene_usd from internnav.projects.dataloader.resumable import ResumablePathKeyDataloader @@ -20,6 +10,16 @@ def generate_episode(dataloader: ResumablePathKeyDataloader, config: EvalCfg): path_key_data = dataloader.path_key_data episodes = [] + # lazy import + from internutopia.core.config.robot import ControllerCfg + from internutopia_extension.configs.robots.h1 import H1RobotCfg + from internutopia_extension.configs.sensors import RepCameraCfg + + from internnav.env.utils.internutopia_extension.configs.metrics import ( + VLNPEMetricCfg, + ) + from internnav.env.utils.internutopia_extension.configs.tasks import VLNEvalTaskCfg + robot = H1RobotCfg( **config.task.robot.robot_settings, controllers=[ControllerCfg(**cfg.controller_settings) for cfg in config.task.robot.controllers], diff --git a/internnav/model/__init__.py b/internnav/model/__init__.py index 9ac47edc..c30b9970 100644 --- a/internnav/model/__init__.py +++ b/internnav/model/__init__.py @@ -1,36 +1,30 @@ -import os - -import numpy as np -import torch -from gym import spaces - -from internnav.utils.common_log_util import common_logger as logger - -from .basemodel.cma.cma_clip_policy import CMA_CLIP_Net, CMACLIPModelConfig -from .basemodel.cma.cma_policy import CMAModelConfig, CMANet -from .basemodel.internvla_n1.internvla_n1_policy import ( - InternVLAN1ModelConfig, - InternVLAN1Net, -) -from .basemodel.navdp.navdp_policy import NavDPModelConfig, NavDPNet -from .basemodel.rdp.rdp_policy import RDPModelConfig, RDPNet -from .basemodel.seq2seq.seq2seq_policy import Seq2SeqModelConfig, Seq2SeqNet -from .utils.misc import set_cuda, set_random_seed, wrap_model -from .utils.save import load_checkpoint - - def get_policy(policy_name): if policy_name == 'CMA_CLIP_Policy': + from .basemodel.cma.cma_clip_policy import CMA_CLIP_Net, CMACLIPModelConfig + return CMA_CLIP_Net elif policy_name == 'RDP_Policy': + from .basemodel.rdp.rdp_policy import RDPNet + return RDPNet elif policy_name == 'CMA_Policy': + from .basemodel.cma.cma_policy import CMAModelConfig, CMANet + return CMANet elif policy_name == 'Seq2Seq_Policy': + from .basemodel.seq2seq.seq2seq_policy import Seq2SeqModelConfig, Seq2SeqNet + return Seq2SeqNet elif policy_name == 'InternVLAN1_Policy': + from .basemodel.internvla_n1.internvla_n1_policy import ( + InternVLAN1ModelConfig, + InternVLAN1Net, + ) + return InternVLAN1Net elif policy_name == 'NavDP_Policy': + from .basemodel.navdp.navdp_policy import NavDPNet + return NavDPNet else: raise ValueError(f'Policy {policy_name} not found') @@ -38,16 +32,31 @@ def get_policy(policy_name): def get_config(policy_name): if policy_name == 'CMA_CLIP_Policy': + from .basemodel.cma.cma_clip_policy import CMA_CLIP_Net, CMACLIPModelConfig + return CMACLIPModelConfig elif policy_name == 'RDP_Policy': + from .basemodel.rdp.rdp_policy import RDPModelConfig + return RDPModelConfig elif policy_name == 'CMA_Policy': + from .basemodel.cma.cma_policy import CMAModelConfig, CMANet + return CMAModelConfig elif policy_name == 'Seq2Seq_Policy': + from .basemodel.seq2seq.seq2seq_policy import Seq2SeqModelConfig, Seq2SeqNet + return Seq2SeqModelConfig elif policy_name == 'InternVLAN1_Policy': + from .basemodel.internvla_n1.internvla_n1_policy import ( + InternVLAN1ModelConfig, + InternVLAN1Net, + ) + return InternVLAN1ModelConfig elif policy_name == 'NavDP_Policy': + from .basemodel.navdp.navdp_policy import NavDPModelConfig + return NavDPModelConfig else: raise ValueError(f'Policy {policy_name} not found') diff --git a/internnav/utils/__init__.py b/internnav/utils/__init__.py new file mode 100644 index 00000000..9f1eacc9 --- /dev/null +++ b/internnav/utils/__init__.py @@ -0,0 +1,2 @@ +from .comm_utils.client import AgentClient +from .comm_utils.server import AgentServer diff --git a/internnav/utils/comm_utils/server.py b/internnav/utils/comm_utils/server.py index a2090768..2d3bf27f 100644 --- a/internnav/utils/comm_utils/server.py +++ b/internnav/utils/comm_utils/server.py @@ -1,8 +1,6 @@ #!/usr/bin/env python import base64 -import importlib.util import pickle -import sys from typing import Dict import uvicorn @@ -73,38 +71,9 @@ def _validate_agent_exists(self, agent_name: str): def run(self, reload=False): uvicorn.run( - '__main__:server.app', + self.app, host=self.host, port=self.port, reload=reload, reload_dirs=['./internnav/agent/', './internnav/model/'], ) - - -def load_eval_cfg(config_path): - spec = importlib.util.spec_from_file_location('eval_config_module', config_path) - config_module = importlib.util.module_from_spec(spec) - sys.modules['eval_config_module'] = config_module - spec.loader.exec_module(config_module) - return getattr(config_module, 'eval_cfg') - - -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument('--host', type=str, default='localhost') -parser.add_argument( - '--config', - type=str, - default='scripts/eval/configs/h1_cma_cfg.py', - help='eval config file path, e.g. scripts/eval/configs/h1_cma_cfg.py', -) -parser.add_argument('--reload', action='store_true') -args = parser.parse_args() -eval_cfg = load_eval_cfg(args.config) -args.port = eval_cfg.agent.server_port - -server = AgentServer(args.host, args.port) - -if __name__ == '__main__': - server.run(args.reload) diff --git a/requirements/core_requirements.txt b/requirements/core_requirements.txt new file mode 100644 index 00000000..26982e3b --- /dev/null +++ b/requirements/core_requirements.txt @@ -0,0 +1,91 @@ +ansi2txt>=0.2.0,<0.3 + +# anyio: 3.x supports py3.8+, 4.x drops 3.8 +anyio>=3.7,<4 ; python_version < "3.9" +anyio>=3.7,<5 ; python_version >= "3.9" + +coloredlogs>=15.0.1,<16 +coverage>=7.5,<8 + +# FastAPI & Starlette pairing, both py>=3.8 +fastapi>=0.110,<0.111 +starlette>=0.36,<0.38 + +filelock>=3.12,<4 +fsspec>=2024.6.0,<2025.0.0 # keep roughly current but flexible + +gpustat>=1.1.1,<2 +gunicorn>=22.0.0,<23 + +# Gym stack +gym>=0.22.0,<=0.26.2 +gym-notices>=0.0.8,<0.1 +gymnasium==0.29.1 + +httpx>=0.25.2,<0.27 # 0.27+ starts tightening deps + +imageio>=2.22,<2.33 +imageio-ffmpeg>=0.6.0,<0.7 + +ipdb>=0.13.13,<0.14 + +jsonschema>=4.18,<4.25 # 4.25+ tends to drop py3.8, keep <4.25 for 3.8 compat +jsonschema-specifications>=2023.12.1,<2026.0.0 + +msgpack-numpy>=0.4.8,<0.5 + +# numpy wheels: +numpy>=1.26,<1.27 + +nvsmi>=0.4.2,<0.5 + +# opencv wheels are python- and platform-specific +opencv-python-headless>=4.9.0.80,<4.10.0.0 + +packaging>=23.0,<25 + +# Pillow: 11.x needs py>=3.10; for older python fallback to <10 +pillow<10.0.0 ; python_version < "3.10" +pillow>=11.0.0,<12.0.0 ; python_version >= "3.10" + +portalocker>=2.7.0,<3 +propcache>=0.3.2,<0.4 +psutil>=5.9.8,<6 + +pydantic>=2.11.0,<2.12 # brings its own pydantic-core + +pytest>=7.3.1,<8 +pytest-cov>=4.1.0,<5 +pytest-timeout>=2.4.0,<3 + +python-dateutil>=2.9.0,<3 +python-multipart>=0.0.9,<0.1 +PyYAML>=6.0.1,<7 + +# ray: pin only for py>=3.10 so py3.8 env can still install the rest +ray==2.47.1 ; python_version >= "3.10" + +referencing>=0.36.2,<0.37 +requests>=2.32.3,<3 +rich>=14.0.0,<15 + +# allow pip to choose an rpds-py that matches python ABI +rpds-py>=0.18,<0.27 + +sentry-sdk>=2.33.0,<3 +setproctitle>=1.3.6,<2 +sniffio>=1.3.1,<2 + +# starlette already declared above with fastapi; leaving here for clarity if you want, +# but don't duplicate. If you keep both, pip will just resolve. +# starlette>=0.36.3,<0.38 + +termcolor>=3.1.0,<4 +tomli>=2.0.1,<3 +tqdm>=4.67.1,<5 +typing_extensions>=4.8,<5 +tyro>=0.9.26,<0.10 + +uvicorn>=0.29.0,<0.31 + +watchdog>=3.0.0,<5 diff --git a/requirements/eval.txt b/requirements/eval.txt deleted file mode 100644 index 0d8028c2..00000000 --- a/requirements/eval.txt +++ /dev/null @@ -1,3 +0,0 @@ -ansi2txt==0.2.0 -pydantic>2.0 -requests==2.32.3 diff --git a/requirements/habitat_requirements.txt b/requirements/habitat_requirements.txt index fd2dfbe9..e69de29b 100644 --- a/requirements/habitat_requirements.txt +++ b/requirements/habitat_requirements.txt @@ -1,7 +0,0 @@ -ftfy==6.3.1 -diffusers==0.33.1 -transformers==4.51.0 --e git+https://github.com/real-stanford/diffusion_policy.git@5ba07ac6661db573af695b419a7947ecb704690f#egg=diffusion_policy -depth-camera-filtering @ git+https://github.com/naokiyokoyama/depth_camera_filtering.git -accelerate==1.4.0 -flash_attn==2.7.4.post1 \ No newline at end of file diff --git a/requirements/internvla_n1.txt b/requirements/internvla_n1.txt new file mode 100644 index 00000000..13c8a819 --- /dev/null +++ b/requirements/internvla_n1.txt @@ -0,0 +1,7 @@ +accelerate==1.4.0 +depth-camera-filtering @ git+https://github.com/naokiyokoyama/depth_camera_filtering.git +diffusers==0.33.1 +diffusion_policy @ git+https://github.com/real-stanford/diffusion_policy.git@5ba07ac6661db573af695b419a7947ecb704690f +flash_attn==2.7.4.post1 +ftfy==6.3.1 +transformers==4.51.0 diff --git a/requirements/isaac_requirements.txt b/requirements/isaac_requirements.txt index 45f9590e..34245eb0 100644 --- a/requirements/isaac_requirements.txt +++ b/requirements/isaac_requirements.txt @@ -1,4 +1,3 @@ -accelerate==1.7.0 addict==2.4.0 aiodns==3.1.1 aiofiles==23.2.1 @@ -15,9 +14,6 @@ asttokens==3.0.0 async-timeout==4.0.3 attrs==23.2.0 av==15.0.0 -azure-core==1.28.0 -azure-identity==1.13.0 -azure-storage-blob==12.17.0 blessed==1.21.0 cchardet==2.1.7 certifi==2024.8.30 @@ -27,13 +23,8 @@ click==8.1.7 cloudpickle==3.1.1 coloredlogs==15.0.1 construct==2.10.68 -coverage==7.4.4 cycler==0.11.0 decorator==5.2.1 -diffusers==0.33.1 -distro==1.9.0 -docstring_parser==0.16 -einops==0.8.1 exceptiongroup==1.2.2 executing==2.2.0 expandvars==1.1.1 @@ -41,10 +32,8 @@ fastapi==0.110.0 ffmpeg==1.4 filelock==3.15.4 flatbuffers==25.2.10 -flash-attn==2.7.2.post1 frozenlist==1.4.1 fsspec==2024.10.0 -ftfy==6.3.1 gitdb==4.0.12 GitPython==3.1.44 gpustat==1.1.1 @@ -53,39 +42,30 @@ gym==0.26.2 gym-notices==0.0.8 gymnasium==0.29.1 h11==0.14.0 -hf-xet==1.1.5 httpcore==1.0.8 httptools==0.6.1 httpx==0.25.2 -huggingface-hub==0.33.4 humanfriendly==10.0 idna==3.6 -idna-ssl==1.1.0 imageio==2.22.2 imageio-ffmpeg==0.6.0 importlib_metadata==8.7.0 inputs==0.5 ipdb==0.13.13 ipython==8.20.0 -isodate==0.6.1 jedi==0.19.2 Jinja2==3.1.4 -jmespath==1.0.1 jsonschema==4.24.1 jsonschema-specifications==2025.4.1 kiwisolver==1.4.4 lcm==1.5.0 -llvmlite==0.42.0 lmdb==1.6.2 lxml==5.3.0 -markdown-it-py==3.0.0 MarkupSafe==3.0.2 matplotlib==3.8.4 matplotlib-inline==0.1.7 mdurl==0.1.2 mpmath==1.3.0 -msal==1.23.0 -msal-extensions==1.0.0 msgpack==1.1.1 msgpack-numpy==0.4.8 multidict==6.0.5 @@ -98,7 +78,6 @@ nvsmi==0.4.2 oauthlib==3.2.2 onnx==1.18.0 onnxruntime==1.19.2 -openai==1.29.0 opencv-python-headless==4.9.0.80 packaging==23.0 params_proto==2.13.2 @@ -138,7 +117,6 @@ rich==14.0.0 rpds-py==0.26.0 rsl-rl-lib==2.2.1 s3transfer==0.6.1 -safetensors==0.5.3 scipy selenium==4.14.0 sentry-sdk==2.33.0 @@ -150,16 +128,12 @@ smmap==5.0.2 sniffio==1.3.1 stack-data==0.6.3 starlette==0.36.3 -sympy==1.13.1 termcolor==3.1.0 -tokenizers==0.21.2 toml==0.10.2 tomli==2.0.1 tornado==6.4.2 tqdm==4.67.1 traitlets==5.14.3 -transformers==4.51.0 -triton==3.1.0 typeguard==4.4.4 typing-inspection==0.4.1 typing_extensions==4.14.1 @@ -169,12 +143,9 @@ uvicorn==0.29.0 vuer==0.0.32rc7 wandb==0.20.0 watchdog==4.0.0 -waterbear==2.6.8 wcwidth==0.2.13 webbot==0.34 websockets==12.0 wrapt==1.16.0 yarl==1.9.4 zipp==3.23.0 -gradio --e git+https://github.com/real-stanford/diffusion_policy.git@5ba07ac6661db573af695b419a7947ecb704690f#egg=diffusion_policy diff --git a/requirements/model_requirements.txt b/requirements/model_requirements.txt new file mode 100644 index 00000000..8a3f0a4e --- /dev/null +++ b/requirements/model_requirements.txt @@ -0,0 +1,33 @@ +accelerate==1.7.0 +azure-core==1.28.0 +azure-identity==1.13.0 +azure-storage-blob==12.17.0 +depth-camera-filtering @ git+https://github.com/naokiyokoyama/depth_camera_filtering.git +diffusers==0.33.1 +diffusion_policy @ git+https://github.com/real-stanford/diffusion_policy.git@5ba07ac6661db573af695b419a7947ecb704690f +distro==1.9.0 +docstring_parser==0.16 +einops==0.8.1 +flash_attn==2.7.2.post1 +ftfy==6.3.1 +hf-xet==1.1.5 +huggingface-hub==0.33.4 +idna-ssl==1.1.0 +isodate==0.6.1 +jmespath==1.0.1 +llvmlite==0.42.0 +markdown-it-py==3.0.0 +msal==1.23.0 +msal-extensions==1.0.0 +open3d==0.19.0 +openai==1.29.0 +pandas==2.3.1 +pyarrow==21.0.0 +s3transfer==0.6.1 +safetensors==0.5.3 +sympy==1.13.1 +tensorboard==2.20.0 +tokenizers==0.21.2 +transformers==4.51.0 +triton==3.1.0 +waterbear==2.6.8 diff --git a/requirements/test.txt b/requirements/test.txt deleted file mode 100644 index 11f4ffc8..00000000 --- a/requirements/test.txt +++ /dev/null @@ -1,5 +0,0 @@ -coverage==7.5.4 -pytest==7.3.1 -pytest-cov==4.1.0 -pytest-timeout==2.4.0 -tomli==2.0.1 diff --git a/requirements/train.txt b/requirements/train.txt deleted file mode 100644 index b20c0ebb..00000000 --- a/requirements/train.txt +++ /dev/null @@ -1,4 +0,0 @@ -pandas==2.3.1 -open3d==0.19.0 -tensorboard==2.20.0 -pyarrow==21.0.0 \ No newline at end of file diff --git a/scripts/eval/configs/h1_internvla_n1_cfg.py b/scripts/eval/configs/h1_internvla_n1_cfg.py index 76fb10eb..90a801cf 100644 --- a/scripts/eval/configs/h1_internvla_n1_cfg.py +++ b/scripts/eval/configs/h1_internvla_n1_cfg.py @@ -1,4 +1,3 @@ -# from scripts.eval.configs.agent import * from internnav.configs.agent import AgentCfg from internnav.configs.evaluator import ( EnvCfg, diff --git a/scripts/eval/configs/h1_rdp_cfg.py b/scripts/eval/configs/h1_rdp_cfg.py index a9ac26a1..a6380c85 100644 --- a/scripts/eval/configs/h1_rdp_cfg.py +++ b/scripts/eval/configs/h1_rdp_cfg.py @@ -9,7 +9,7 @@ eval_cfg = EvalCfg( agent=AgentCfg( - server_port=8080, + server_port=8087, model_name='rdp', ckpt_path='checkpoints/r2r/fine_tuned/rdp', model_settings={}, diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py index 7cb72a38..68c507af 100644 --- a/scripts/eval/eval.py +++ b/scripts/eval/eval.py @@ -5,8 +5,8 @@ import argparse import importlib.util +from internnav.configs.evaluator.vln_default_config import get_config from internnav.evaluator import Evaluator -from internnav.evaluator.utils.vln_default_config import get_config # This file is the main file @@ -16,7 +16,7 @@ def parse_args(): parser.add_argument( "--config", type=str, - default='scripts/eval/configs/h1_cma_cfg.py', + default='scripts/eval/configs/h1_rdp_cfg.py', help='eval config file path, e.g. scripts/eval/configs/h1_cma_cfg.py', ) return parser.parse_args() diff --git a/scripts/eval/eval_habitat.py b/scripts/eval/eval_habitat.py index 27bf04a3..e78a8d6f 100644 --- a/scripts/eval/eval_habitat.py +++ b/scripts/eval/eval_habitat.py @@ -1,6 +1,9 @@ import argparse import json import os +import sys + +sys.path.append('./src/diffusion-policy') import numpy as np import torch diff --git a/scripts/eval/start_server.py b/scripts/eval/start_server.py index eb001127..5a03cd73 100644 --- a/scripts/eval/start_server.py +++ b/scripts/eval/start_server.py @@ -2,6 +2,7 @@ import sys sys.path.append('.') +sys.path.append('./src/diffusion-policy') import argparse import glob @@ -10,7 +11,7 @@ import os import sys -from internnav.utils.comm_utils.server import AgentServer +from internnav.utils import AgentServer # import all agents to register them @@ -22,9 +23,7 @@ def auto_register_agents(agent_dir: str): for module in agent_modules: if not module.endswith('__init__.py'): # Avoid importing __init__.py itself module_name = os.path.basename(module)[:-3] # Remove the .py extension - importlib.import_module( - f'internnav_baselines.agents.{module_name}' - ) # Replace 'agents' with your module's package + importlib.import_module(f'internnav.agent.{module_name}') # Replace 'agents' with your module's package def load_eval_cfg(config_path, attr_name='eval_cfg'): @@ -39,7 +38,7 @@ def load_eval_cfg(config_path, attr_name='eval_cfg'): print("Starting Agent Server...") print("Registering agents...") - auto_register_agents('internnav_baselines/agents') + auto_register_agents('internnav/agent') parser = argparse.ArgumentParser() parser.add_argument('--host', type=str, default='localhost') diff --git a/scripts/iros_challenge/eval_iros.py b/scripts/iros_challenge/eval_iros.py index 8924bf1b..cdb02861 100644 --- a/scripts/iros_challenge/eval_iros.py +++ b/scripts/iros_challenge/eval_iros.py @@ -5,8 +5,7 @@ import argparse import importlib.util -from evaluator.utils.vln_default_config import get_config - +from internnav.configs.evaluator.vln_default_config import get_config from internnav.evaluator import Evaluator # This file is the main file diff --git a/scripts/iros_challenge/onsite_competition/captures/rs_depth_mm.png b/scripts/iros_challenge/onsite_competition/captures/rs_depth_mm.png new file mode 100644 index 00000000..7c46c358 Binary files /dev/null and b/scripts/iros_challenge/onsite_competition/captures/rs_depth_mm.png differ diff --git a/scripts/iros_challenge/onsite_competition/captures/rs_depth_vis.png b/scripts/iros_challenge/onsite_competition/captures/rs_depth_vis.png new file mode 100644 index 00000000..85a629b8 Binary files /dev/null and b/scripts/iros_challenge/onsite_competition/captures/rs_depth_vis.png differ diff --git a/scripts/iros_challenge/onsite_competition/sdk/save_obs.py b/scripts/iros_challenge/onsite_competition/sdk/save_obs.py index adb1d8bd..82dc8b54 100644 --- a/scripts/iros_challenge/onsite_competition/sdk/save_obs.py +++ b/scripts/iros_challenge/onsite_competition/sdk/save_obs.py @@ -105,6 +105,7 @@ def load_obs_from_meta(meta_path: str, nan_for_zeros: bool = False) -> Dict: paths = meta.get("paths", {}) rgb_path = _resolve(base, paths.get("rgb")) depth_mm_path = _resolve(base, paths.get("depth_mm")) + print(rgb_path, depth_mm_path) # 读 RGB(保存时就是 BGR,OpenCV 读回来仍是 BGR) rgb = None @@ -133,6 +134,7 @@ def load_obs_from_meta(meta_path: str, nan_for_zeros: bool = False) -> Dict: "timestamp_s": float(meta.get("timestamp_s", 0.0)), "intrinsics": meta.get("intrinsics", {}), } + print(obs) return obs diff --git a/scripts/iros_challenge/onsite_competition/sdk/test_agent.py b/scripts/iros_challenge/onsite_competition/sdk/test_agent.py index deeeb798..0a10211c 100644 --- a/scripts/iros_challenge/onsite_competition/sdk/test_agent.py +++ b/scripts/iros_challenge/onsite_competition/sdk/test_agent.py @@ -1,11 +1,23 @@ import importlib.util import sys +sys.path.append('.') +sys.path.append('./src/diffusion-policy/') + + import numpy as np -from evaluator.utils.vln_default_config import get_config from save_obs import load_obs_from_meta -from internnav.agent.utils.client import AgentClient +# from internnav.configs.evaluator.vln_default_config import get_config +from internnav.configs.agent import AgentCfg +from internnav.configs.evaluator import ( + EnvCfg, + EvalCfg, + EvalDatasetCfg, + SceneCfg, + TaskCfg, +) +from internnav.utils.comm_utils.client import AgentClient def load_eval_cfg(config_path, attr_name='eval_cfg'): @@ -18,8 +30,70 @@ def load_eval_cfg(config_path, attr_name='eval_cfg'): # test if agent behave normally with fake observation def test_agent(cfg_path=None, obs=None): - cfg = load_eval_cfg(cfg_path, attr_name='eval_cfg') - cfg = get_config(cfg) + # cfg = load_eval_cfg(cfg_path, attr_name='eval_cfg') + # cfg = get_config(cfg) + cfg = EvalCfg( + agent=AgentCfg( + server_host='localhost', + server_port=8087, + model_name='internvla_n1', + ckpt_path='', + model_settings={ + 'policy_name': "InternVLAN1_Policy", + 'state_encoder': None, + 'env_num': 1, + 'sim_num': 1, + 'model_path': "checkpoints/InternVLA-N1", + 'camera_intrinsic': [[585.0, 0.0, 320.0], [0.0, 585.0, 240.0], [0.0, 0.0, 1.0]], + 'width': 640, + 'height': 480, + 'hfov': 79, + 'resize_w': 384, + 'resize_h': 384, + 'max_new_tokens': 1024, + 'num_frames': 32, + 'num_history': 8, + 'num_future_steps': 4, + 'device': 'cuda:0', + 'predict_step_nums': 32, + 'continuous_traj': True, + # debug + 'vis_debug': True, # If vis_debug=True, you can get visualization results + 'vis_debug_path': './logs/test/vis_debug', + }, + ), + env=EnvCfg( + env_type='internutopia', + env_settings={ + 'use_fabric': False, + 'headless': True, + }, + ), + task=TaskCfg( + task_name='cma_kujiale_eval', + task_settings={ + 'env_num': 2, + 'use_distributed': True, + 'proc_num': 4, + }, + scene=SceneCfg( + scene_type='kujiale', + scene_data_dir='interiornav_data/scene_data', + ), + robot_name='h1', + robot_usd_path='data/Embodiments/vln-pe/h1/h1_vln_pointcloud.usd', + camera_resolution=[256, 256], # (W,H) + camera_prim_path='torso_link/h1_pano_camera_0', + ), + dataset=EvalDatasetCfg( + dataset_type="kujiale", + dataset_settings={ + 'base_data_dir': 'interiornav_data/raw_data', + 'split_data_types': ['val_unseen', 'val_seen'], + 'filter_stairs': False, + }, + ), + ) agent = AgentClient(cfg.agent) for _ in range(10): @@ -32,7 +106,7 @@ def test_agent(cfg_path=None, obs=None): # use your own path # cfg_path = '/root/InternNav/scripts/eval/configs/h1_rdp_cfg.py' cfg_path = '/root/InternNav/scripts/eval/configs/h1_internvla_n1_cfg.py' - rs_meta_path = 'challenge/onsite_competition/captures/rs_meta.json' + rs_meta_path = 'scripts/iros_challenge/onsite_competition/captures/rs_meta.json' fake_obs_256 = { 'rgb': np.zeros((256, 256, 3), dtype=np.uint8), @@ -44,8 +118,8 @@ def test_agent(cfg_path=None, obs=None): print(fake_obs_640['rgb'].shape, fake_obs_640['depth'].shape) sim_obs = { - 'rgb': np.load('challenge/onsite_competition/captures/sim_rgb.npy'), - 'depth': np.load('challenge/onsite_competition/captures/sim_depth.npy'), + 'rgb': np.load('scripts/iros_challenge/onsite_competition/captures/sim_rgb.npy'), + 'depth': np.load('scripts/iros_challenge/onsite_competition/captures/sim_depth.npy'), } print(sim_obs['rgb'].shape, sim_obs['depth'].shape) # dtype (uint8 and float32) and value range # TODO: crop to 256,256, test with fake_obs_256 diff --git a/setup.py b/setup.py index 6b964a23..2256e165 100644 --- a/setup.py +++ b/setup.py @@ -38,9 +38,18 @@ def parse_readme(readme: str) -> str: long_description = io.open(readme_filepath, 'r', encoding='utf-8').read() long_description = parse_readme(long_description) -with open('requirements/eval.txt', 'r') as f: +with open('requirements/core_requirements.txt', 'r') as f: install_requires = f.read().splitlines() +with open('requirements/model_requirements.txt', 'r') as f: + model_requires = f.read().splitlines() + +with open('requirements/isaac_requirements.txt', 'r') as f: + isaac_requires = f.read().splitlines() + +with open('requirements/internvla_n1.txt', 'r') as f: + n1_requires = f.read().splitlines() + setuptools.setup( name='internnav', version='0.0.1', @@ -48,7 +57,7 @@ def parse_readme(readme: str) -> str: author='OpenRobotLab', author_email='OpenRobotLab@pjlab.org.cn', license='Apache 2.0', - readme='README.md', + # readme='README.md', description='InternNav: A benchmark evaluation framework for navigation tasks', long_description=long_description, long_description_content_type='text/markdown', @@ -63,4 +72,18 @@ def parse_readme(readme: str) -> str: ], install_requires=install_requires, include_package_data=True, + extras_require={ + # envs + "isaac": isaac_requires, + "habitat": [], + "demo": [ + "gradio==5.45", + "hf-xet==1.1.5", + "huggingface-hub==0.33.4", + ], + # models + "internvla_n1": n1_requires, + "baseline": model_requires, + "model": model_requires, + }, ) diff --git a/tests/function_test/test_evaluator.py b/tests/function_test/test_evaluator.py index b6658e40..155a3b2c 100644 --- a/tests/function_test/test_evaluator.py +++ b/tests/function_test/test_evaluator.py @@ -29,8 +29,8 @@ SceneCfg, TaskCfg, ) +from internnav.configs.evaluator.vln_default_config import get_config from internnav.evaluator import Evaluator -from internnav.evaluator.utils.vln_default_config import get_config eval_cfg = EvalCfg( agent=AgentCfg(