Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
4b5363a
pass single process test
Mar 31, 2024
ce3c005
Your commit message
Apr 16, 2024
d97f8fc
did some changes
Apr 17, 2024
9ec5728
fixed the issues
Apr 17, 2024
2cf206c
Applied changes from diff and resolved conflicts
Apr 18, 2024
241636f
sb3
AdrianHuang2002 Apr 27, 2024
e3391c2
refine sb3
AdrianHuang2002 Apr 29, 2024
d2f1450
refine sb3
AdrianHuang2002 Apr 29, 2024
e402274
Model Architecture
AdrianHuang2002 Apr 29, 2024
b061c00
Model Architecture and sb3_procgen_plr refine
AdrianHuang2002 Apr 29, 2024
9d30b18
Model Architecture and sb3_procgen_plr refine
AdrianHuang2002 Apr 29, 2024
5700585
Add files via upload
AdrianHuang2002 Apr 29, 2024
b660323
Model Architecture modify
AdrianHuang2002 Apr 30, 2024
9f903dc
Merge branch 'sb3-progen-plr' of github.com:AdrianHuang2002/Syllabus …
AdrianHuang2002 Apr 30, 2024
37d29c3
Model Architecture modify
AdrianHuang2002 Apr 30, 2024
906a55b
Model Architecture modify
AdrianHuang2002 May 3, 2024
979876e
Add SB3 Agent code
RyanNavillus May 6, 2024
e8bfba0
Model Architecture completed version
AdrianHuang2002 May 8, 2024
d3ce33d
changed init_weights method
AdrianHuang2002 May 8, 2024
27b80aa
changed init_weights method
AdrianHuang2002 May 9, 2024
e85eaff
changed init_weights method
AdrianHuang2002 May 9, 2024
48ca132
value_net weight update
AdrianHuang2002 May 10, 2024
3330f15
init_weights update and change in CustomCallback
AdrianHuang2002 May 12, 2024
1bb55b9
init_weights update and change in CustomCallback _on_step function
AdrianHuang2002 May 14, 2024
02daa40
init_weights update and change in CustomCallback _on_step function
AdrianHuang2002 May 14, 2024
3a93293
changes in CustomCallback _on_step function
AdrianHuang2002 May 18, 2024
c8b5ae6
Merge branch 'main' into sb3-progen-plr
RyanNavillus May 18, 2024
143d59f
Testing eval changes
RyanNavillus May 18, 2024
d899f04
Fix tasks for PLR update
RyanNavillus May 18, 2024
e13b8e4
sb3-procgen-plr final version
AdrianHuang2002 May 20, 2024
0712d48
Update curriculum_base.py
May 31, 2024
c4de462
changes in SequentialCurriculum
AdrianHuang2002 Jul 6, 2024
11f1c5b
a bit reduce to the overlapped part for condition ‘call’ function and…
AdrianHuang2002 Jul 9, 2024
04066da
Local changes before merge
AdrianHuang2002 Jul 17, 2024
acb5097
modification for custom_metric
AdrianHuang2002 Jul 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Binary file added profiling_results.prof
Binary file not shown.
Binary file added syllabus/.DS_Store
Binary file not shown.
Binary file added syllabus/core/.DS_Store
Binary file not shown.
95 changes: 73 additions & 22 deletions syllabus/core/curriculum_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from typing import Any, Callable, List, Tuple, Union

import numpy as np
from gymnasium.spaces import Dict

from gymnasium.spaces import Dict, Box
import random
from syllabus.task_space import TaskSpace
from itertools import product
from .stat_recorder import StatRecorder


Expand All @@ -14,25 +15,36 @@ class Curriculum:
"""Base class and API for defining curricula to interface with Gym environments.
"""

def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_names: Callable = None, record_stats: bool = False) -> None:
def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_names: Callable = None, record_stats: bool = False, warmup_strategy: str = None, warmup_samples: int = 0) -> None:
"""Initialize the base Curriculum

:param task_space: the environment's task space from which new tasks are sampled
TODO: Implement this in a way that works with any curriculum, maybe as a wrapper
:param random_start_tasks: Number of uniform random tasks to sample before using the algorithm's sample method, defaults to 0
TODO: Use task space for this
:param random_start_tasks: Number of tasks to sample randomly at the start, defaults to 0
:param task_names: Names of the tasks in the task space, defaults to None
:param record_stats: Boolean to indicate if statistics should be recorded, defaults to False
:param warmup_strategy: Strategy for warmup, defaults to None
:param warmup_samples: Number of warmup samples, defaults to 0
"""
assert isinstance(task_space, TaskSpace), f"task_space must be a TaskSpace object. Got {type(task_space)} instead."
self.task_space = task_space
self.random_start_tasks = random_start_tasks
self.completed_tasks = 0
self.task_names = task_names if task_names is not None else lambda task, idx: idx
self.n_updates = 0
self.startup_sampled_tasks = 0
self.warmup_strategy = warmup_strategy
self.warmup_tasks = warmup_samples
self.fix_curr_index = 0
self.stat_recorder = StatRecorder(self.task_space, task_names=task_names) if record_stats else None

if self.num_tasks == 0:
if warmup_strategy == "fix" and isinstance(self.task_space.gym_space, Box):
self.fix_box_space = self._initialize_fixed_grid()

if self.num_tasks is None:
warnings.warn("Task space is continuous. Number of warmup tasks can't be compared to the task space size.")
elif self.num_tasks == 0:
warnings.warn("Task space is empty. This will cause errors during sampling if no tasks are added.")
elif warmup_samples > self.num_tasks:
warnings.warn("Number of warmup tasks is larger than task space, some tasks will be replayed during warmup.")

@property
def requires_step_updates(self) -> bool:
Expand Down Expand Up @@ -182,14 +194,47 @@ def _sample_distribution(self) -> List[float]:
Any curriculum that maintains a true probability distribution should implement this method to retrieve it.
"""
raise NotImplementedError

def _initialize_fixed_grid(self):
dims = self.task_space.gym_space.shape[0]
samples_per_dim = int(round(pow(self.warmup_tasks,(1 / dims))))
ranges = [np.linspace(self.task_space.gym_space.low[i], self.task_space.gym_space.high[i], samples_per_dim)
for i in range(dims)]
all_points = list(product(*ranges))
sampled_tasks = [tuple(point) for point in all_points]

return sampled_tasks

def _should_use_startup_sampling(self) -> bool:
return self.warmup_strategy != "none" and self.startup_sampled_tasks < self.warmup_tasks

def _startup_sample(self, k: int) -> List:
sampled_tasks = []

if isinstance(self.task_space.gym_space, Box):
if self.warmup_strategy == "fix":
sampled_tasks = self.fix_box_space
self.fix_curr_index = (self.fix_curr_index + self.warmup_tasks) % len(sampled_tasks)
elif self.warmup_strategy == "random":
sampled_tasks = [self.task_space.gym_space.sample() for _ in range(k)]

else:
if self.warmup_strategy == "fix":
if self.fix_curr_index + k > self.num_tasks:
sampled_tasks = self.tasks[self.fix_curr_index:self.num_tasks]
self.fix_curr_index = self.fix_curr_index + k - self.num_tasks
sampled_tasks.extend(self.tasks[0:(self.fix_curr_index)])
else:
sampled_tasks = self.tasks[self.fix_curr_index:self.fix_curr_index + k]
self.fix_curr_index += k

def _should_use_startup_sampling(self) -> bool:
return self.random_start_tasks > 0 and self.completed_tasks < self.random_start_tasks

def _startup_sample(self) -> List:
task_dist = [0.0 / self.num_tasks for _ in range(self.num_tasks)]
task_dist[0] = 1.0
return task_dist
elif self.warmup_strategy == "random":
# Allows sampling with replacement, making duplicates possible if k > num_tasks.
indices = random.choices(range(self.num_tasks), k=k)
sampled_tasks = [self.tasks[idx] for idx in indices]
self.startup_sampled_tasks += k
return sampled_tasks

def sample(self, k: int = 1) -> Union[List, Any]:
"""Sample k tasks from the curriculum.
Expand All @@ -200,14 +245,20 @@ def sample(self, k: int = 1) -> Union[List, Any]:
# assert self.num_tasks > 0, "Task space is empty. Please add tasks to the curriculum before sampling."

if self._should_use_startup_sampling():
return self._startup_sample()

# Use list of indices because np.choice does not play nice with tuple tasks
# tasks = self.tasks
n_tasks = self.num_tasks
tasks = self._startup_sample(k)
# Check if the startup sampling has satisfied the request or if there's no progress (no tasks returned)
if len(tasks) > 0 and len(tasks) < k: # Check if we need to add more tasks
additional_tasks = self.sample(k=k-len(tasks))
tasks.extend(additional_tasks)
return tasks

task_dist = self._sample_distribution()
task_idx = np.random.choice(list(range(n_tasks)), size=k, p=task_dist)
return task_idx

# Normal sampling process
tasks = self.tasks
n_tasks = len(tasks)
task_idx = np.random.choice(range(n_tasks), size=k, p=task_dist)
return [tasks[i] for i in task_idx]

def log_metrics(self, writer, step=None, log_full_dist=False):
"""Log the task distribution to the provided tensorboard writer.
Expand Down
3 changes: 2 additions & 1 deletion syllabus/core/environment_sync_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def reset(self, *args, **kwargs):

def step(self, action):
obs, rew, term, trunc, info = step_api_compatibility(self.env.step(action), output_truncation_bool=True)
info["task"] = self.task_space.encode(self.get_task())
self.episode_length += 1
self.episode_return += rew
self.task_progress = info.get("task_completion", 0.0)
Expand Down Expand Up @@ -491,4 +492,4 @@ def add_task(self, task):
def __getattr__(self, attr):
env_attr = getattr(self.env, attr, None)
if env_attr:
return env_attr
return env_attr
Binary file added syllabus/curricula/.DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions syllabus/curricula/annealing_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def sample(self, k: int = 1) -> Union[List, Any]:
Sample k tasks from the curriculum.
"""
# Linear annealing from start_values to end_values
if self._should_use_startup_sampling():
return self._startup_sample(k)

annealed_values = (
self.start_values + (self.end_values - self.start_values) *
np.minimum(self.current_step, self.total_steps) / self.total_steps
Expand Down
6 changes: 3 additions & 3 deletions syllabus/curricula/plr/central_plr_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ def _sample_distribution(self) -> List[float]:
def sample(self, k: int = 1) -> Union[List, Any]:
self.num_samples += 1
if self._should_use_startup_sampling():
return self._startup_sample()
else:
return [self._task_sampler.sample() for _ in range(k)]
return self._startup_sample(k)

return [self._task_sampler.sample() for _ in range(k)]

def _enumerate_tasks(self, space):
assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete"
Expand Down
6 changes: 3 additions & 3 deletions syllabus/curricula/plr/plr_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,9 @@ def _sample_distribution(self) -> List[float]:

def sample(self, k: int = 1) -> Union[List, Any]:
if self._should_use_startup_sampling():
return self._startup_sample()
else:
return [self._task_sampler.sample() for _ in range(k)]
return self._startup_sample(k)

return [self._task_sampler.sample() for _ in range(k)]

def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None:
"""
Expand Down
2 changes: 2 additions & 0 deletions syllabus/curricula/plr/task_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ def sample_weights(self):
self.staleness_temperature,
self.task_staleness,
)
if np.isclose(np.sum(staleness_weights), 0):
staleness_weights = np.ones_like(staleness_weights, dtype=float) / len(staleness_weights)
staleness_weights = staleness_weights * (1 - self.unseen_task_weights)
z = np.sum(staleness_weights)
if z > 0:
Expand Down
41 changes: 23 additions & 18 deletions syllabus/curricula/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ class SequentialCurriculum(Curriculum):
REQUIRES_EPISODE_UPDATES = True
REQUIRES_CENTRAL_UPDATES = False

def __init__(self, curriculum_list: List[Curriculum], stopping_conditions: List[Any], *curriculum_args, return_buffer_size: int = 1000, **curriculum_kwargs):
def __init__(self, curriculum_list: List[Curriculum], stopping_conditions: List[Any], *curriculum_args, custom_metrics: dict = None, return_buffer_size: int = 1000, **curriculum_kwargs):
super().__init__(*curriculum_args, **curriculum_kwargs)
assert len(curriculum_list) > 0, "Must provide at least one curriculum"
assert len(stopping_conditions) == len(curriculum_list) - 1, f"Stopping conditions must be one less than the number of curricula. Final curriculum is used for the remainder of training. Expected {len(curriculum_list) - 1}, got {len(stopping_conditions)}."
if len(curriculum_list) == 1:
warnings.warn("Your sequential curriculum only containes one element. Consider using that element directly instead.")
warnings.warn("Your sequential curriculum only contains one element. Consider using that element directly instead.")

self.custom_metrics = custom_metrics if custom_metrics is not None else {} # Stores the functions that can calculate the data during training
self.metric_values = {metric: None for metric in self.custom_metrics.keys()} # Initialize the metric values dictionary with None
self.curriculum_list = self._parse_curriculum_list(curriculum_list)
self.stopping_conditions = self._parse_stopping_conditions(stopping_conditions)
self._curriculum_index = 0
Expand Down Expand Up @@ -83,23 +85,25 @@ def _parse_condition_string(self, condition: str) -> Callable:

try:
metric, comparator, value = clauses

if metric == "steps":
metric_fn = self._get_steps
elif metric == "total_steps":
metric_fn = self._get_total_steps
elif metric == "episodes":
metric_fn = self._get_episodes
elif metric == "total_episodes":
metric_fn = self._get_total_episodes
elif metric == "tasks":
metric_fn = self._get_tasks
elif metric == "total_tasks":
metric_fn = self._get_total_tasks
elif metric == "episode_return":
metric_fn = self._get_episode_return
if metric in self.metric_values:
metric_fn = lambda: self.metric_values[metric]
else:
raise ValueError(f"Invalid metric name: {metric}")
if metric == "steps":
metric_fn = self._get_steps
elif metric == "total_steps":
metric_fn = self._get_total_steps
elif metric == "episodes":
metric_fn = self._get_episodes
elif metric == "total_episodes":
metric_fn = self._get_total_episodes
elif metric == "tasks":
metric_fn = self._get_tasks
elif metric == "total_tasks":
metric_fn = self._get_total_tasks
elif metric == "episode_return":
metric_fn = self._get_episode_return
else:
raise ValueError(f"Invalid metric name: {metric}")

if comparator == '<':
return lambda: metric_fn() < float(value)
Expand Down Expand Up @@ -164,6 +168,7 @@ def sample(self, k: int = 1) -> Union[List, Any]:
return recoded_tasks

def update_on_episode(self, episode_return, episode_len, episode_task, env_id=None):
print(self.metric_values)
self.n_episodes += 1
self.total_episodes += 1
self.n_steps += episode_len
Expand Down
15 changes: 4 additions & 11 deletions syllabus/curricula/simple_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,7 @@ def sample(self, k: int = 1) -> Union[List, Any]:
"""
Sample k tasks from the curriculum.
"""
return [self.max_range for _ in range(k)]

def log_metrics(self, writer, step=None):
print("Logging", flush=True)
try:
import wandb
writer.log({"range_min": self.max_range[0]}, step=step)
writer.log({"range_max": self.max_range[1]}, step=step)

except ImportError:
pass
if self._should_use_startup_sampling():
return self._startup_sample(k)

return [self.max_range for _ in range(k)]
Loading