Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,5 @@ logs/
/results/
checkpoints
internnav/model/basemodel/LongCLIP/
.gradio/
result/
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ repos:
- id: trailing-whitespace
- id: check-yaml
- id: end-of-file-fixer
- id: requirements-txt-fixer
# - id: requirements-txt-fixer
- id: check-merge-conflict
- id: fix-encoding-pragma
args: ["--remove"]
Expand Down
4 changes: 2 additions & 2 deletions internnav/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ def __init__(self, config: AgentCfg):
self.config = config

def step(self, obs: Dict[str, Any]):
pass
raise NotImplementedError("This function is not implemented yet.")

def reset(self):
pass
raise NotImplementedError("This function is not implemented yet.")

@classmethod
def register(cls, agent_type: str):
Expand Down
6 changes: 4 additions & 2 deletions internnav/agent/rdp_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from internnav.configs.agent import AgentCfg
from internnav.configs.model.base_encoders import ModelCfg
from internnav.model import get_config, get_policy
from internnav.model.basemodel.LongCLIP.model import longclip
from internnav.model.basemodel.rdp.utils import (
FixedLengthStack,
compute_actions,
Expand All @@ -19,7 +18,6 @@
quat_to_euler_angles,
to_local_coords_batch,
)
from internnav.model.utils.bert_token import BertTokenizer
from internnav.model.utils.feature_extract import (
extract_image_features,
extract_instruction_tokens,
Expand Down Expand Up @@ -67,13 +65,17 @@ def __init__(self, config: AgentCfg):

if self.use_clip_encoders:
if self._model_settings.text_encoder.type == 'roberta':
from internnav.model.utils.bert_token import BertTokenizer

self.bert_tokenizer = BertTokenizer(
max_length=self._model_settings.instruction_encoder.max_length,
load_model=self._model_settings.instruction_encoder.load_model,
device=self.device,
)
self.use_bert = True
elif self._model_settings.text_encoder.type == 'clip-long':
from internnav.model.basemodel.LongCLIP.model import longclip

self.bert_tokenizer = longclip.tokenize
self.use_bert = True
self.is_clip_long = True
Expand Down
54 changes: 54 additions & 0 deletions internnav/agent/simple_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import time
from typing import Any, Dict

import torch

from internnav.agent import Agent
from internnav.configs.agent import AgentCfg
from internnav.model import get_config, get_policy


class SimpleAgent(Agent):
"""
agent template, override the functions for custom policy
"""

def __init__(self, agent_config: AgentCfg):
self.agent_config = agent_config
self.device = torch.device('cuda', 0)

# get policy by name
policy = get_policy(agent_config.model_settings.policy_name)

# load policy checkpoints
self.policy = policy.from_pretrained(
agent_config.ckpt_path,
config=get_config(agent_config.model_settings.policy_name)(
model_cfg={'model': agent_config.model_settings.model_dump()}
),
).to(self.device)

def convert_input(self, obs):
return obs

def convert_output(self, action):
return action

def inference(self, input):
return self.policy(input)

def step(self, obs: Dict[str, Any]):
print(f'{self.config.model_name} Agent step')
start = time.time()

# convert obs to model input
obs = self.convert_input(obs)
action = self.inference(obs)
action = self.convert_output(action)

end = time.time()
print(f'time: {round(end-start, 4)}s')
return action

def reset(self):
pass
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .vln_camera import VLNCameraCfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .vln_eval_task import VLNEvalTaskCfg
3 changes: 2 additions & 1 deletion internnav/evaluator/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from collections import defaultdict

import numpy as np
from internutopia.core.util import is_in_container
from PIL import Image, ImageDraw
from scipy.ndimage import binary_dilation

Expand Down Expand Up @@ -243,6 +242,8 @@ def load_data(dataset_root_dir, split, filter_same_trajectory=True, filter_stair

def load_scene_usd(mp3d_data_dir, scan):
"""Load scene USD based on the scan"""
from internutopia.core.util import is_in_container

find_flag = False
for root, dirs, files in os.walk(os.path.join(mp3d_data_dir, scan)):
target_file_name = 'fixed_docker.usd' if is_in_container() else 'fixed.usd'
Expand Down
20 changes: 10 additions & 10 deletions internnav/evaluator/utils/eval.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
from internutopia.core.config.robot import ControllerCfg
from internutopia_extension.configs.robots.h1 import H1RobotCfg
from internutopia_extension.configs.sensors import RepCameraCfg

from internnav.configs.evaluator import EvalCfg
from internnav.env.utils.internutopia_extension.configs.metrics.vln_pe_metrics import (
VLNPEMetricCfg,
)
from internnav.env.utils.internutopia_extension.configs.tasks.vln_eval_task import (
VLNEvalTaskCfg,
)
from internnav.evaluator.utils.common import load_kujiale_scene_usd, load_scene_usd
from internnav.projects.dataloader.resumable import ResumablePathKeyDataloader

Expand All @@ -20,6 +10,16 @@ def generate_episode(dataloader: ResumablePathKeyDataloader, config: EvalCfg):
path_key_data = dataloader.path_key_data
episodes = []

# lazy import
from internutopia.core.config.robot import ControllerCfg
from internutopia_extension.configs.robots.h1 import H1RobotCfg
from internutopia_extension.configs.sensors import RepCameraCfg

from internnav.env.utils.internutopia_extension.configs.metrics import (
VLNPEMetricCfg,
)
from internnav.env.utils.internutopia_extension.configs.tasks import VLNEvalTaskCfg

robot = H1RobotCfg(
**config.task.robot.robot_settings,
controllers=[ControllerCfg(**cfg.controller_settings) for cfg in config.task.robot.controllers],
Expand Down
51 changes: 30 additions & 21 deletions internnav/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,62 @@
import os

import numpy as np
import torch
from gym import spaces

from internnav.utils.common_log_util import common_logger as logger

from .basemodel.cma.cma_clip_policy import CMA_CLIP_Net, CMACLIPModelConfig
from .basemodel.cma.cma_policy import CMAModelConfig, CMANet
from .basemodel.internvla_n1.internvla_n1_policy import (
InternVLAN1ModelConfig,
InternVLAN1Net,
)
from .basemodel.navdp.navdp_policy import NavDPModelConfig, NavDPNet
from .basemodel.rdp.rdp_policy import RDPModelConfig, RDPNet
from .basemodel.seq2seq.seq2seq_policy import Seq2SeqModelConfig, Seq2SeqNet
from .utils.misc import set_cuda, set_random_seed, wrap_model
from .utils.save import load_checkpoint


def get_policy(policy_name):
if policy_name == 'CMA_CLIP_Policy':
from .basemodel.cma.cma_clip_policy import CMA_CLIP_Net, CMACLIPModelConfig

return CMA_CLIP_Net
elif policy_name == 'RDP_Policy':
from .basemodel.rdp.rdp_policy import RDPNet

return RDPNet
elif policy_name == 'CMA_Policy':
from .basemodel.cma.cma_policy import CMAModelConfig, CMANet

return CMANet
elif policy_name == 'Seq2Seq_Policy':
from .basemodel.seq2seq.seq2seq_policy import Seq2SeqModelConfig, Seq2SeqNet

return Seq2SeqNet
elif policy_name == 'InternVLAN1_Policy':
from .basemodel.internvla_n1.internvla_n1_policy import (
InternVLAN1ModelConfig,
InternVLAN1Net,
)

return InternVLAN1Net
elif policy_name == 'NavDP_Policy':
from .basemodel.navdp.navdp_policy import NavDPNet

return NavDPNet
else:
raise ValueError(f'Policy {policy_name} not found')


def get_config(policy_name):
if policy_name == 'CMA_CLIP_Policy':
from .basemodel.cma.cma_clip_policy import CMA_CLIP_Net, CMACLIPModelConfig

return CMACLIPModelConfig
elif policy_name == 'RDP_Policy':
from .basemodel.rdp.rdp_policy import RDPModelConfig

return RDPModelConfig
elif policy_name == 'CMA_Policy':
from .basemodel.cma.cma_policy import CMAModelConfig, CMANet

return CMAModelConfig
elif policy_name == 'Seq2Seq_Policy':
from .basemodel.seq2seq.seq2seq_policy import Seq2SeqModelConfig, Seq2SeqNet

return Seq2SeqModelConfig
elif policy_name == 'InternVLAN1_Policy':
from .basemodel.internvla_n1.internvla_n1_policy import (
InternVLAN1ModelConfig,
InternVLAN1Net,
)

return InternVLAN1ModelConfig
elif policy_name == 'NavDP_Policy':
from .basemodel.navdp.navdp_policy import NavDPModelConfig

return NavDPModelConfig
else:
raise ValueError(f'Policy {policy_name} not found')
2 changes: 2 additions & 0 deletions internnav/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .comm_utils.client import AgentClient
from .comm_utils.server import AgentServer
33 changes: 1 addition & 32 deletions internnav/utils/comm_utils/server.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#!/usr/bin/env python
import base64
import importlib.util
import pickle
import sys
from typing import Dict

import uvicorn
Expand Down Expand Up @@ -73,38 +71,9 @@ def _validate_agent_exists(self, agent_name: str):

def run(self, reload=False):
uvicorn.run(
'__main__:server.app',
self.app,
host=self.host,
port=self.port,
reload=reload,
reload_dirs=['./internnav/agent/', './internnav/model/'],
)


def load_eval_cfg(config_path):
spec = importlib.util.spec_from_file_location('eval_config_module', config_path)
config_module = importlib.util.module_from_spec(spec)
sys.modules['eval_config_module'] = config_module
spec.loader.exec_module(config_module)
return getattr(config_module, 'eval_cfg')


import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--host', type=str, default='localhost')
parser.add_argument(
'--config',
type=str,
default='scripts/eval/configs/h1_cma_cfg.py',
help='eval config file path, e.g. scripts/eval/configs/h1_cma_cfg.py',
)
parser.add_argument('--reload', action='store_true')
args = parser.parse_args()
eval_cfg = load_eval_cfg(args.config)
args.port = eval_cfg.agent.server_port

server = AgentServer(args.host, args.port)

if __name__ == '__main__':
server.run(args.reload)
91 changes: 91 additions & 0 deletions requirements/core_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
ansi2txt>=0.2.0,<0.3

# anyio: 3.x supports py3.8+, 4.x drops 3.8
anyio>=3.7,<4 ; python_version < "3.9"
anyio>=3.7,<5 ; python_version >= "3.9"

coloredlogs>=15.0.1,<16
coverage>=7.5,<8

# FastAPI & Starlette pairing, both py>=3.8
fastapi>=0.110,<0.111
starlette>=0.36,<0.38

filelock>=3.12,<4
fsspec>=2024.6.0,<2025.0.0 # keep roughly current but flexible

gpustat>=1.1.1,<2
gunicorn>=22.0.0,<23

# Gym stack
gym>=0.22.0,<=0.26.2
gym-notices>=0.0.8,<0.1
gymnasium==0.29.1

httpx>=0.25.2,<0.27 # 0.27+ starts tightening deps

imageio>=2.22,<2.33
imageio-ffmpeg>=0.6.0,<0.7

ipdb>=0.13.13,<0.14

jsonschema>=4.18,<4.25 # 4.25+ tends to drop py3.8, keep <4.25 for 3.8 compat
jsonschema-specifications>=2023.12.1,<2026.0.0

msgpack-numpy>=0.4.8,<0.5

# numpy wheels:
numpy>=1.26,<1.27

nvsmi>=0.4.2,<0.5

# opencv wheels are python- and platform-specific
opencv-python-headless>=4.9.0.80,<4.10.0.0

packaging>=23.0,<25

# Pillow: 11.x needs py>=3.10; for older python fallback to <10
pillow<10.0.0 ; python_version < "3.10"
pillow>=11.0.0,<12.0.0 ; python_version >= "3.10"

portalocker>=2.7.0,<3
propcache>=0.3.2,<0.4
psutil>=5.9.8,<6

pydantic>=2.11.0,<2.12 # brings its own pydantic-core

pytest>=7.3.1,<8
pytest-cov>=4.1.0,<5
pytest-timeout>=2.4.0,<3

python-dateutil>=2.9.0,<3
python-multipart>=0.0.9,<0.1
PyYAML>=6.0.1,<7

# ray: pin only for py>=3.10 so py3.8 env can still install the rest
ray==2.47.1 ; python_version >= "3.10"

referencing>=0.36.2,<0.37
requests>=2.32.3,<3
rich>=14.0.0,<15

# allow pip to choose an rpds-py that matches python ABI
rpds-py>=0.18,<0.27

sentry-sdk>=2.33.0,<3
setproctitle>=1.3.6,<2
sniffio>=1.3.1,<2

# starlette already declared above with fastapi; leaving here for clarity if you want,
# but don't duplicate. If you keep both, pip will just resolve.
# starlette>=0.36.3,<0.38

termcolor>=3.1.0,<4
tomli>=2.0.1,<3
tqdm>=4.67.1,<5
typing_extensions>=4.8,<5
tyro>=0.9.26,<0.10

uvicorn>=0.29.0,<0.31

watchdog>=3.0.0,<5
Loading