RyanNavillus · AdrianHuang2002 · Mar 31, 2024 · Apr 16, 2024 · Apr 17, 2024 · Apr 17, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/profiling_results.prof b/profiling_results.prof
diff --git a/syllabus/.DS_Store b/syllabus/.DS_Store
diff --git a/syllabus/core/.DS_Store b/syllabus/core/.DS_Store
diff --git a/syllabus/core/curriculum_base.py b/syllabus/core/curriculum_base.py
@@ -3,9 +3,10 @@
 from typing import Any, Callable, List, Tuple, Union
 
 import numpy as np
-from gymnasium.spaces import Dict
-
+from gymnasium.spaces import Dict, Box
+import random
 from syllabus.task_space import TaskSpace
+from itertools import product
 from .stat_recorder import StatRecorder
 
 
@@ -14,25 +15,36 @@ class Curriculum:
     """Base class and API for defining curricula to interface with Gym environments.
     """
 
-    def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_names: Callable = None, record_stats: bool = False) -> None:
+    def __init__(self, task_space: TaskSpace, random_start_tasks: int = 0, task_names: Callable = None, record_stats: bool = False, warmup_strategy: str = None, warmup_samples: int = 0) -> None:
         """Initialize the base Curriculum
 
         :param task_space: the environment's task space from which new tasks are sampled
-        TODO: Implement this in a way that works with any curriculum, maybe as a wrapper
-        :param random_start_tasks: Number of uniform random tasks to sample before using the algorithm's sample method, defaults to 0
-        TODO: Use task space for this
+        :param random_start_tasks: Number of tasks to sample randomly at the start, defaults to 0
         :param task_names: Names of the tasks in the task space, defaults to None
+        :param record_stats: Boolean to indicate if statistics should be recorded, defaults to False
+        :param warmup_strategy: Strategy for warmup, defaults to None
+        :param warmup_samples: Number of warmup samples, defaults to 0
         """
         assert isinstance(task_space, TaskSpace), f"task_space must be a TaskSpace object. Got {type(task_space)} instead."
         self.task_space = task_space
-        self.random_start_tasks = random_start_tasks
         self.completed_tasks = 0
         self.task_names = task_names if task_names is not None else lambda task, idx: idx
         self.n_updates = 0
+        self.startup_sampled_tasks = 0
+        self.warmup_strategy = warmup_strategy
+        self.warmup_tasks = warmup_samples
+        self.fix_curr_index = 0
         self.stat_recorder = StatRecorder(self.task_space, task_names=task_names) if record_stats else None
 
-        if self.num_tasks == 0:
+        if warmup_strategy == "fix" and isinstance(self.task_space.gym_space, Box):
+            self.fix_box_space = self._initialize_fixed_grid()
+
+        if self.num_tasks is None:
+            warnings.warn("Task space is continuous. Number of warmup tasks can't be compared to the task space size.")
+        elif self.num_tasks == 0:
             warnings.warn("Task space is empty. This will cause errors during sampling if no tasks are added.")
+        elif warmup_samples > self.num_tasks:
+            warnings.warn("Number of warmup tasks is larger than task space, some tasks will be replayed during warmup.")
 
     @property
     def requires_step_updates(self) -> bool:
@@ -182,14 +194,47 @@ def _sample_distribution(self) -> List[float]:
         Any curriculum that maintains a true probability distribution should implement this method to retrieve it.
         """
         raise NotImplementedError
+
+    def _initialize_fixed_grid(self):
+        dims = self.task_space.gym_space.shape[0]
+        samples_per_dim = int(round(pow(self.warmup_tasks,(1 / dims))))
+        ranges = [np.linspace(self.task_space.gym_space.low[i], self.task_space.gym_space.high[i], samples_per_dim)
+                  for i in range(dims)]
+        all_points = list(product(*ranges))
+        sampled_tasks = [tuple(point) for point in all_points]
+
+        return sampled_tasks
+
+    def _should_use_startup_sampling(self) -> bool:  
+        return self.warmup_strategy != "none" and self.startup_sampled_tasks < self.warmup_tasks
+
+    def _startup_sample(self, k: int) -> List:
+        sampled_tasks = []
+
+        if isinstance(self.task_space.gym_space, Box):
+            if self.warmup_strategy == "fix":
+                sampled_tasks = self.fix_box_space
+                self.fix_curr_index = (self.fix_curr_index + self.warmup_tasks) % len(sampled_tasks)
+            elif self.warmup_strategy == "random":
+                sampled_tasks = [self.task_space.gym_space.sample() for _ in range(k)]
+
+        else:
+            if self.warmup_strategy == "fix":
+                if self.fix_curr_index + k > self.num_tasks:
+                    sampled_tasks = self.tasks[self.fix_curr_index:self.num_tasks]
+                    self.fix_curr_index = self.fix_curr_index + k - self.num_tasks
+                    sampled_tasks.extend(self.tasks[0:(self.fix_curr_index)])
+                else:
+                    sampled_tasks = self.tasks[self.fix_curr_index:self.fix_curr_index + k]
+                    self.fix_curr_index += k
 
-    def _should_use_startup_sampling(self) -> bool:
-        return self.random_start_tasks > 0 and self.completed_tasks < self.random_start_tasks
-
-    def _startup_sample(self) -> List:
-        task_dist = [0.0 / self.num_tasks for _ in range(self.num_tasks)]
-        task_dist[0] = 1.0
-        return task_dist
+            elif self.warmup_strategy == "random":
+                # Allows sampling with replacement, making duplicates possible if k > num_tasks.
+                indices = random.choices(range(self.num_tasks), k=k)
+                sampled_tasks = [self.tasks[idx] for idx in indices]
+                
+        self.startup_sampled_tasks += k
+        return sampled_tasks
 
     def sample(self, k: int = 1) -> Union[List, Any]:
         """Sample k tasks from the curriculum.
@@ -200,14 +245,20 @@ def sample(self, k: int = 1) -> Union[List, Any]:
         # assert self.num_tasks > 0, "Task space is empty. Please add tasks to the curriculum before sampling."
 
         if self._should_use_startup_sampling():
-            return self._startup_sample()
-
-        # Use list of indices because np.choice does not play nice with tuple tasks
-        # tasks = self.tasks
-        n_tasks = self.num_tasks
+            tasks = self._startup_sample(k)
+            # Check if the startup sampling has satisfied the request or if there's no progress (no tasks returned)
+            if len(tasks) > 0 and len(tasks) < k:  # Check if we need to add more tasks
+                additional_tasks = self.sample(k=k-len(tasks))
+                tasks.extend(additional_tasks) 
+            return tasks
+
         task_dist = self._sample_distribution()
-        task_idx = np.random.choice(list(range(n_tasks)), size=k, p=task_dist)
-        return task_idx
+
+        # Normal sampling process
+        tasks = self.tasks
+        n_tasks = len(tasks)
+        task_idx = np.random.choice(range(n_tasks), size=k, p=task_dist)
+        return [tasks[i] for i in task_idx]
 
     def log_metrics(self, writer, step=None, log_full_dist=False):
         """Log the task distribution to the provided tensorboard writer.

diff --git a/syllabus/core/environment_sync_wrapper.py b/syllabus/core/environment_sync_wrapper.py
@@ -87,6 +87,7 @@ def reset(self, *args, **kwargs):
 
     def step(self, action):
         obs, rew, term, trunc, info = step_api_compatibility(self.env.step(action), output_truncation_bool=True)
+        info["task"] = self.task_space.encode(self.get_task())
         self.episode_length += 1
         self.episode_return += rew
         self.task_progress = info.get("task_completion", 0.0)
@@ -491,4 +492,4 @@ def add_task(self, task):
     def __getattr__(self, attr):
         env_attr = getattr(self.env, attr, None)
         if env_attr:
-            return env_attr
+            return env_attr
diff --git a/syllabus/curricula/.DS_Store b/syllabus/curricula/.DS_Store
diff --git a/syllabus/curricula/annealing_box.py b/syllabus/curricula/annealing_box.py
@@ -48,6 +48,9 @@ def sample(self, k: int = 1) -> Union[List, Any]:
         Sample k tasks from the curriculum.
         """
         # Linear annealing from start_values to end_values
+        if self._should_use_startup_sampling():
+            return self._startup_sample(k)
+
         annealed_values = (
             self.start_values + (self.end_values - self.start_values) *
             np.minimum(self.current_step, self.total_steps) / self.total_steps

diff --git a/syllabus/curricula/plr/central_plr_wrapper.py b/syllabus/curricula/plr/central_plr_wrapper.py
@@ -215,9 +215,9 @@ def _sample_distribution(self) -> List[float]:
     def sample(self, k: int = 1) -> Union[List, Any]:
         self.num_samples += 1
         if self._should_use_startup_sampling():
-            return self._startup_sample()
-        else:
-            return [self._task_sampler.sample() for _ in range(k)]
+            return self._startup_sample(k)
+
+        return [self._task_sampler.sample() for _ in range(k)]
 
     def _enumerate_tasks(self, space):
         assert isinstance(space, Discrete) or isinstance(space, MultiDiscrete), f"Unsupported task space {space}: Expected Discrete or MultiDiscrete"

diff --git a/syllabus/curricula/plr/plr_wrapper.py b/syllabus/curricula/plr/plr_wrapper.py
@@ -216,9 +216,9 @@ def _sample_distribution(self) -> List[float]:
 
     def sample(self, k: int = 1) -> Union[List, Any]:
         if self._should_use_startup_sampling():
-            return self._startup_sample()
-        else:
-            return [self._task_sampler.sample() for _ in range(k)]
+            return self._startup_sample(k)
+
+        return [self._task_sampler.sample() for _ in range(k)]
 
     def update_on_step(self, task, obs, rew, term, trunc, info, env_id: int = None) -> None:
         """

diff --git a/syllabus/curricula/plr/task_sampler.py b/syllabus/curricula/plr/task_sampler.py
@@ -311,6 +311,8 @@ def sample_weights(self):
                 self.staleness_temperature,
                 self.task_staleness,
             )
+            if np.isclose(np.sum(staleness_weights), 0):
+                staleness_weights = np.ones_like(staleness_weights, dtype=float) / len(staleness_weights)
             staleness_weights = staleness_weights * (1 - self.unseen_task_weights)
             z = np.sum(staleness_weights)
             if z > 0:

diff --git a/syllabus/curricula/sequential.py b/syllabus/curricula/sequential.py
@@ -15,13 +15,15 @@ class SequentialCurriculum(Curriculum):
     REQUIRES_EPISODE_UPDATES = True
     REQUIRES_CENTRAL_UPDATES = False
 
-    def __init__(self, curriculum_list: List[Curriculum], stopping_conditions: List[Any], *curriculum_args, return_buffer_size: int = 1000, **curriculum_kwargs):
+    def __init__(self, curriculum_list: List[Curriculum], stopping_conditions: List[Any], *curriculum_args, custom_metrics: dict = None, return_buffer_size: int = 1000, **curriculum_kwargs):
         super().__init__(*curriculum_args, **curriculum_kwargs)
         assert len(curriculum_list) > 0, "Must provide at least one curriculum"
         assert len(stopping_conditions) == len(curriculum_list) - 1, f"Stopping conditions must be one less than the number of curricula. Final curriculum is used for the remainder of training. Expected {len(curriculum_list) - 1}, got {len(stopping_conditions)}."
         if len(curriculum_list) == 1:
-            warnings.warn("Your sequential curriculum only containes one element. Consider using that element directly instead.")
+            warnings.warn("Your sequential curriculum only contains one element. Consider using that element directly instead.")
 
+        self.custom_metrics = custom_metrics if custom_metrics is not None else {}  # Stores the functions that can calculate the data during training
+        self.metric_values = {metric: None for metric in self.custom_metrics.keys()}  # Initialize the metric values dictionary with None
         self.curriculum_list = self._parse_curriculum_list(curriculum_list)
         self.stopping_conditions = self._parse_stopping_conditions(stopping_conditions)
         self._curriculum_index = 0
@@ -83,23 +85,25 @@ def _parse_condition_string(self, condition: str) -> Callable:
 
         try:
             metric, comparator, value = clauses
-
-            if metric == "steps":
-                metric_fn = self._get_steps
-            elif metric == "total_steps":
-                metric_fn = self._get_total_steps
-            elif metric == "episodes":
-                metric_fn = self._get_episodes
-            elif metric == "total_episodes":
-                metric_fn = self._get_total_episodes
-            elif metric == "tasks":
-                metric_fn = self._get_tasks
-            elif metric == "total_tasks":
-                metric_fn = self._get_total_tasks
-            elif metric == "episode_return":
-                metric_fn = self._get_episode_return
+            if metric in self.metric_values:
+                metric_fn = lambda: self.metric_values[metric]
             else:
-                raise ValueError(f"Invalid metric name: {metric}")
+                if metric == "steps":
+                    metric_fn = self._get_steps
+                elif metric == "total_steps":
+                    metric_fn = self._get_total_steps
+                elif metric == "episodes":
+                    metric_fn = self._get_episodes
+                elif metric == "total_episodes":
+                    metric_fn = self._get_total_episodes
+                elif metric == "tasks":
+                    metric_fn = self._get_tasks
+                elif metric == "total_tasks":
+                    metric_fn = self._get_total_tasks
+                elif metric == "episode_return":
+                    metric_fn = self._get_episode_return
+                else:
+                    raise ValueError(f"Invalid metric name: {metric}")
 
             if comparator == '<':
                 return lambda: metric_fn() < float(value)
@@ -164,6 +168,7 @@ def sample(self, k: int = 1) -> Union[List, Any]:
         return recoded_tasks
 
     def update_on_episode(self, episode_return, episode_len, episode_task, env_id=None):
+        print(self.metric_values)
         self.n_episodes += 1
         self.total_episodes += 1
         self.n_steps += episode_len

diff --git a/syllabus/curricula/simple_box.py b/syllabus/curricula/simple_box.py
@@ -60,14 +60,7 @@ def sample(self, k: int = 1) -> Union[List, Any]:
         """
         Sample k tasks from the curriculum.
         """
-        return [self.max_range for _ in range(k)]
-
-    def log_metrics(self, writer, step=None):
-        print("Logging", flush=True)
-        try:
-            import wandb
-            writer.log({"range_min": self.max_range[0]}, step=step)
-            writer.log({"range_max": self.max_range[1]}, step=step)
-
-        except ImportError:
-            pass
+        if self._should_use_startup_sampling():
+            return self._startup_sample(k)
+
+        return [self.max_range for _ in range(k)]