diff --git a/.gitignore b/.gitignore index 0e2d7cea..d812bf16 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,9 @@ coverage.xml *,cover .hypothesis/ prof/ + +# VSCode +.vscode + +# Running pre-commit seems to generate these +.mypy_cache diff --git a/ConfigSpace/util.pyx b/ConfigSpace/util.pyx index 37dd636c..72bcef70 100644 --- a/ConfigSpace/util.pyx +++ b/ConfigSpace/util.pyx @@ -30,7 +30,7 @@ from collections import deque import copy -from typing import Union, Dict, Generator +from typing import Union, Dict, Generator, List, Tuple, Optional import numpy as np # type: ignore from ConfigSpace import Configuration, ConfigurationSpace @@ -39,6 +39,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant, \ OrdinalHyperparameter, NumericalHyperparameter import ConfigSpace.c_util +cimport cython def impute_inactive_values(configuration: Configuration, strategy: Union[str, float] = 'default') -> Configuration: @@ -436,3 +437,192 @@ def fix_types(configuration: dict, else: raise TypeError("Unknown hyperparameter type %s" % type(param)) return configuration + +@cython.boundscheck(True) # Activate bounds checking +@cython.wraparound(True) # Activate negative indexing +def generate_grid(configuration_space: ConfigurationSpace, + num_steps_dict: Optional[Dict[str, int]] = None, + ) -> List[Configuration]: + """ + Generates a grid of Configurations for a given ConfigurationSpace. Can be used, for example, for grid search. + + Parameters + ---------- + configuration_space: :class:`~ConfigSpace.configuration_space.ConfigurationSpace` + The Configuration space over which to create a grid of HyperParameter Configuration values. It knows the types for all parameter values. + + num_steps_dict: dict + A dict containing the number of points to divide the grid side formed by Hyperparameters which are either of type UniformFloatHyperparameter or type UniformIntegerHyperparameter. The keys in the dict should be the names of the corresponding Hyperparameters and the values should be the number of points to divide the grid side formed by the corresponding Hyperparameter in to. + + Returns + ------- + list + List containing Configurations. It is a cartesian product of tuples of HyperParameter values. Each tuple lists the possible values taken by the corresponding HyperParameter. Within the cartesian product, in each element, the ordering of HyperParameters is the same for the OrderedDict within the ConfigurationSpace. + """ + + def get_value_set(num_steps_dict: Optional[Dict[str, int]], hp_name: str): + ''' + Gets values along the grid for a particular hyperparameter. + + Uses the num_steps_dict to determine number of grid values for UniformFloatHyperparameter and UniformIntegerHyperparameter. If these values are not present in num_steps_dict, the quantization factor, q, of these classes will be used to divide the grid. NOTE: When q is used if it is None, a ValueError is raised. + + Parameters + ---------- + num_steps_dict: dict + Same description as above + + hp_name: str + Hyperparameter name + + Returns + ------- + tuple + Holds grid values for the given hyperparameter + + ''' + param = configuration_space.get_hyperparameter(hp_name) + if isinstance(param, (CategoricalHyperparameter)): + return param.choices + + elif isinstance(param, (OrdinalHyperparameter)): + return param.sequence + + elif isinstance(param, Constant): + return tuple([param.value, ]) + + elif isinstance(param, UniformFloatHyperparameter): + if param.log: + lower, upper = np.log([param.lower, param.upper]) + else: + lower, upper = param.lower, param.upper + + if num_steps_dict is not None and param.name in num_steps_dict: + num_steps = num_steps_dict[param.name] + grid_points = np.linspace(lower, upper, num_steps) + else: + if param.q is not None: + grid_points = np.arange(lower, upper + param.q, param.q) # check for log and for rounding issues + else: + raise ValueError("num_steps_dict is None or doesn't contain the number of points to divide " + param.name + " into. And its quantization factor is None. Please provide/set one of these values.") + + if param.log: + grid_points = np.exp(grid_points) + + # Avoiding rounding off issues + if grid_points[0] < param.lower: + grid_points[0] = param.lower + if grid_points[-1] > param.upper: + grid_points[-1] = param.upper + + return tuple(grid_points) + + elif isinstance(param, UniformIntegerHyperparameter): + if param.log: + lower, upper = np.log([param.lower, param.upper]) + else: + lower, upper = param.lower, param.upper + + if num_steps_dict is not None and param.name in num_steps_dict: + num_steps = num_steps_dict[param.name] + grid_points = np.linspace(lower, upper, num_steps) + else: + if param.q is not None: + grid_points = np.arange(lower, upper + param.q, param.q) # check for log and for rounding issues + else: + raise ValueError("num_steps_dict is None or doesn't contain the number of points to divide " + param.name + " into. And its quantization factor is None. Please provide/set one of these values.") + + if param.log: + grid_points = np.exp(grid_points) + grid_points = grid_points.astype(int) + + # Avoiding rounding off issues + if grid_points[0] < param.lower: + grid_points[0] = param.lower + if grid_points[-1] > param.upper: + grid_points[-1] = param.upper + + return tuple(grid_points) + + else: + raise TypeError("Unknown hyperparameter type %s" % type(param)) + + def get_cartesian_product(value_sets: List[Tuple], hp_names: List[str]): + ''' + Returns a grid for a subspace of the configuration with given hyperparameters and their grid values. + + Takes a list of tuples of grid values of the hyperparameters and list of hyperparameter names. The outer list iterates over the hyperparameters corresponding to the order in the list of hyperparameter names. The inner tuples contain grid values of the hyperparameters for each hyperparameter. + + Parameters + ---------- + value_sets: list of tuples + Same description as return value of get_value_set() + + hp_names: list of strs + List of hyperparameter names + + Returns + ------- + list of dicts + List of configuration dicts + + ''' + grid = [] + import itertools + if len(value_sets) == 0: + pass # Edge case + else: + for element in itertools.product(*value_sets): + config_dict = {} + for j, hp_name in enumerate(hp_names): + config_dict[hp_name] = element[j] + grid.append(config_dict) + + return grid + + + value_sets = [] # list of tuples: each tuple within is the grid values to be taken on by a Hyperparameter + hp_names = [] + + # Get HP names and allowed grid values they can take for the HPs at the top level of ConfigSpace tree + for hp_name in configuration_space._children['__HPOlib_configuration_space_root__']: + value_sets.append(get_value_set(num_steps_dict, hp_name)) + hp_names.append(hp_name) + + # Create a Cartesian product of above allowed values for the HPs. Hold them in an "unchecked" deque because some of the conditionally dependent HPs may become active for some of the elements of the Cartesian product and in these cases creating a Configuration would throw an Error (see below). + unchecked_grid_pts = deque(get_cartesian_product(value_sets, hp_names)) # Creates a deque of Configuration dicts + checked_grid_pts = [] + + while len(unchecked_grid_pts) > 0: + try: + grid_point = Configuration(configuration_space, unchecked_grid_pts[0]) + checked_grid_pts.append(grid_point) + except ValueError as e: + assert str(e)[:23] == "Active hyperparameter '" and str(e)[-16:] == "' not specified!", "Caught exception contains unexpected message." + value_sets = [] + hp_names = [] + new_active_hp_names = [] + + for hp_name in unchecked_grid_pts[0]: # "for" loop over currently active HP names + value_sets.append(tuple([unchecked_grid_pts[0][hp_name], ])) + hp_names.append(hp_name) + for new_hp_name in configuration_space._children[hp_name]: # Checks if the conditionally dependent children of already active HPs are now active + if new_hp_name not in new_active_hp_names and new_hp_name not in unchecked_grid_pts[0]: + all_cond_ = True + for cond in configuration_space._parent_conditions_of[new_hp_name]: + if not cond.evaluate(unchecked_grid_pts[0]): + all_cond_ = False + if all_cond_: + new_active_hp_names.append(new_hp_name) + + + for hp_name in new_active_hp_names: + value_sets.append(get_value_set(num_steps_dict, hp_name)) + hp_names.append(hp_name) + if len(new_active_hp_names) > 0: # this check might not be needed, as there is always going to be a new active HP when in this except block? + new_conditonal_grid = get_cartesian_product(value_sets, hp_names) + unchecked_grid_pts += new_conditonal_grid + else: + raise RuntimeError("Unexpected error: There should have been a newly activated hyperparameter for the current configuration values: " + str(unchecked_grid_pts[0]) + ". Please contact the developers with the code you ran and the stack trace.") + unchecked_grid_pts.popleft() + + return checked_grid_pts diff --git a/test/test_util.py b/test/test_util.py index c5f2013f..9b2c4aad 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -33,10 +33,10 @@ from ConfigSpace import Configuration, ConfigurationSpace, UniformIntegerHyperparameter, \ UniformFloatHyperparameter, CategoricalHyperparameter, Constant, OrdinalHyperparameter, \ - EqualsCondition, AndConjunction, OrConjunction + EqualsCondition, AndConjunction, OrConjunction, LessThanCondition, GreaterThanCondition from ConfigSpace.read_and_write.pcs import read from ConfigSpace.util import impute_inactive_values, get_random_neighbor, \ - get_one_exchange_neighbourhood, deactivate_inactive_hyperparameters, fix_types + get_one_exchange_neighbourhood, deactivate_inactive_hyperparameters, fix_types, generate_grid import ConfigSpace.c_util @@ -342,3 +342,151 @@ def test_fix_types(self): c = cs.get_default_configuration().get_dictionary() c_str = {k: str(v) for k, v in c.items()} self.assertEqual(fix_types(c_str, cs), c) + + def test_generate_grid(self): + '''Test grid generation''' + + # Sub-test 1 + cs = ConfigurationSpace(seed=1234) + + cat1 = CategoricalHyperparameter(name='cat1', choices=['T', 'F']) + const1 = Constant(name='const1', value=4) + float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) + int1 = UniformIntegerHyperparameter(name='int1', lower=10, upper=100, log=True) + ord1 = OrdinalHyperparameter(name='ord1', sequence=['1', '2', '3']) + + cs.add_hyperparameters([float1, int1, cat1, ord1, const1]) + + num_steps_dict = {'float1': 11, 'int1': 6} + generated_grid = generate_grid(cs, num_steps_dict) + + # Check randomly pre-selected values in the generated_grid + # 2 * 1 * 11 * 6 * 3 total diff. possible configurations + self.assertEqual(len(generated_grid), 396) + # Check 1st and last generated configurations completely: + first_expected_dict = {'cat1': 'T', 'const1': 4, 'float1': -1.0, 'int1': 10, 'ord1': '1'} + last_expected_dict = {'cat1': 'F', 'const1': 4, 'float1': 1.0, 'int1': 100, 'ord1': '3'} + self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) + self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) + self.assertEqual(generated_grid[198].get_dictionary()['cat1'], 'F') + self.assertEqual(generated_grid[45].get_dictionary()['const1'], 4) + # The 2 most frequently changing HPs (int1 and ord1) have 3 * 6 = 18 different values for + # each value of float1, so the 4th value of float1 of -0.4 is reached after + # 3 * 18 = 54 values in the generated_grid (and remains the same for the next 18 values): + for i in range(18): + self.assertAlmostEqual(generated_grid[54+i].get_dictionary()['float1'], -0.4, places=2) + # 5th diff. value for int1 after 4 * 3 = 12 values. Reasoning as above. + self.assertEqual(generated_grid[12].get_dictionary()['int1'], 63) + self.assertEqual(generated_grid[3].get_dictionary()['ord1'], '1') + self.assertEqual(generated_grid[4].get_dictionary()['ord1'], '2') + self.assertEqual(generated_grid[5].get_dictionary()['ord1'], '3') + + # Sub-test 2 + # Test for extreme cases: only numerical + cs = ConfigurationSpace(seed=1234) + cs.add_hyperparameters([float1, int1]) + + num_steps_dict = {'float1': 11, 'int1': 6} + generated_grid = generate_grid(cs, num_steps_dict) + + self.assertEqual(len(generated_grid), 66) + # Check 1st and last generated configurations completely: + first_expected_dict = {'float1': -1.0, 'int1': 10} + last_expected_dict = {'float1': 1.0, 'int1': 100} + self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) + self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) + + # Test: only categorical + cs = ConfigurationSpace(seed=1234) + cs.add_hyperparameters([cat1]) + + generated_grid = generate_grid(cs) + + self.assertEqual(len(generated_grid), 2) + # Check 1st and last generated configurations completely: + self.assertEqual(generated_grid[0].get_dictionary()['cat1'], 'T') + self.assertEqual(generated_grid[-1].get_dictionary()['cat1'], 'F') + + # Test: only constant + cs = ConfigurationSpace(seed=1234) + cs.add_hyperparameters([const1]) + + generated_grid = generate_grid(cs) + + self.assertEqual(len(generated_grid), 1) + # Check 1st and only generated configuration completely: + self.assertEqual(generated_grid[0].get_dictionary()['const1'], 4) + + # Test: no hyperparameters yet + cs = ConfigurationSpace(seed=1234) + + generated_grid = generate_grid(cs, num_steps_dict) + + # For the case of no hyperparameters, in get_cartesian_product, itertools.product() returns + # a single empty tuple element which leads to a single empty Configuration. + self.assertEqual(len(generated_grid), 0) + + # Sub-test 3 + # Tests for quantization and conditional spaces. num_steps_dict supports specifying steps + # for only some of the int and float HPs. The rest are taken from the 'q' member variables + # of these HPs. The conditional space tested has 2 levels of conditions. + cs2 = ConfigurationSpace(seed=123) + float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False) + int1 = UniformIntegerHyperparameter(name='int1', lower=0, upper=1000, log=False, q=500) + cs2.add_hyperparameters([float1, int1]) + + int2_cond = UniformIntegerHyperparameter(name='int2_cond', lower=10, upper=100, log=True) + cs2.add_hyperparameters([int2_cond]) + cond_1 = AndConjunction(LessThanCondition(int2_cond, float1, -0.5), + GreaterThanCondition(int2_cond, int1, 600)) + cs2.add_conditions([cond_1]) + cat1_cond = CategoricalHyperparameter(name='cat1_cond', choices=['apple', 'orange']) + cs2.add_hyperparameters([cat1_cond]) + cond_2 = AndConjunction(GreaterThanCondition(cat1_cond, int1, 300), + LessThanCondition(cat1_cond, int1, 700), + GreaterThanCondition(cat1_cond, float1, -0.5), + LessThanCondition(cat1_cond, float1, 0.5) + ) + cs2.add_conditions([cond_2]) + float2_cond = UniformFloatHyperparameter(name='float2_cond', + lower=10., upper=100., log=True) + # 2nd level dependency in ConfigurationSpace tree being tested + cs2.add_hyperparameters([float2_cond]) + cond_3 = GreaterThanCondition(float2_cond, int2_cond, 50) + cs2.add_conditions([cond_3]) + num_steps_dict1 = {'float1': 4, 'int2_cond': 3, 'float2_cond': 3} + generated_grid = generate_grid(cs2, num_steps_dict1) + self.assertEqual(len(generated_grid), 18) + + # RR: I manually generated the grid and verified the values were correct. + # Check 1st and last generated configurations completely: + first_expected_dict = {'float1': -1.0, 'int1': 0} + last_expected_dict = {'float1': -1.0, 'int1': 1000, 'int2_cond': 100, 'float2_cond': 100.0} + self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict) + self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict) + # Here, we test that a few randomly chosen values in the generated grid + # correspond to the ones I checked. + self.assertEqual(generated_grid[3].get_dictionary()['int1'], 1000) + self.assertEqual(generated_grid[12].get_dictionary()['cat1_cond'], 'orange') + self.assertAlmostEqual(generated_grid[-2].get_dictionary()['float2_cond'], + 31.622776601683803, places=3) + + # Sub-test 4 + # Test: only a single hyperparameter and num_steps_dict is None + cs = ConfigurationSpace(seed=1234) + cs.add_hyperparameters([float1]) + + num_steps_dict = {'float1': 11} + try: + generated_grid = generate_grid(cs) + except ValueError as e: + assert str(e) == "num_steps_dict is None or doesn't contain " \ + "the number of points to divide float1 into. And its quantization " \ + "factor is None. Please provide/set one of these values." + + generated_grid = generate_grid(cs, num_steps_dict) + + self.assertEqual(len(generated_grid), 11) + # Check 1st and last generated configurations completely: + self.assertEqual(generated_grid[0].get_dictionary()['float1'], -1.0) + self.assertEqual(generated_grid[-1].get_dictionary()['float1'], 1.0)