Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@ coverage.xml
*,cover
.hypothesis/
prof/

# VSCode
.vscode

# Running pre-commit seems to generate these
.mypy_cache
192 changes: 191 additions & 1 deletion ConfigSpace/util.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from collections import deque
import copy
from typing import Union, Dict, Generator
from typing import Union, Dict, Generator, List, Tuple, Optional

import numpy as np # type: ignore
from ConfigSpace import Configuration, ConfigurationSpace
Expand All @@ -39,6 +39,7 @@ from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant, \
OrdinalHyperparameter, NumericalHyperparameter
import ConfigSpace.c_util
cimport cython


def impute_inactive_values(configuration: Configuration, strategy: Union[str, float] = 'default') -> Configuration:
Expand Down Expand Up @@ -436,3 +437,192 @@ def fix_types(configuration: dict,
else:
raise TypeError("Unknown hyperparameter type %s" % type(param))
return configuration

@cython.boundscheck(True) # Activate bounds checking
@cython.wraparound(True) # Activate negative indexing
def generate_grid(configuration_space: ConfigurationSpace,
num_steps_dict: Optional[Dict[str, int]] = None,
) -> List[Configuration]:
"""
Generates a grid of Configurations for a given ConfigurationSpace. Can be used, for example, for grid search.

Parameters
----------
configuration_space: :class:`~ConfigSpace.configuration_space.ConfigurationSpace`
The Configuration space over which to create a grid of HyperParameter Configuration values. It knows the types for all parameter values.

num_steps_dict: dict
A dict containing the number of points to divide the grid side formed by Hyperparameters which are either of type UniformFloatHyperparameter or type UniformIntegerHyperparameter. The keys in the dict should be the names of the corresponding Hyperparameters and the values should be the number of points to divide the grid side formed by the corresponding Hyperparameter in to.

Returns
-------
list
List containing Configurations. It is a cartesian product of tuples of HyperParameter values. Each tuple lists the possible values taken by the corresponding HyperParameter. Within the cartesian product, in each element, the ordering of HyperParameters is the same for the OrderedDict within the ConfigurationSpace.
"""

def get_value_set(num_steps_dict: Optional[Dict[str, int]], hp_name: str):
'''
Gets values along the grid for a particular hyperparameter.

Uses the num_steps_dict to determine number of grid values for UniformFloatHyperparameter and UniformIntegerHyperparameter. If these values are not present in num_steps_dict, the quantization factor, q, of these classes will be used to divide the grid. NOTE: When q is used if it is None, a ValueError is raised.

Parameters
----------
num_steps_dict: dict
Same description as above

hp_name: str
Hyperparameter name

Returns
-------
tuple
Holds grid values for the given hyperparameter

'''
param = configuration_space.get_hyperparameter(hp_name)
if isinstance(param, (CategoricalHyperparameter)):
return param.choices

elif isinstance(param, (OrdinalHyperparameter)):
return param.sequence

elif isinstance(param, Constant):
return tuple([param.value, ])

elif isinstance(param, UniformFloatHyperparameter):
if param.log:
lower, upper = np.log([param.lower, param.upper])
else:
lower, upper = param.lower, param.upper

if num_steps_dict is not None and param.name in num_steps_dict:
num_steps = num_steps_dict[param.name]
grid_points = np.linspace(lower, upper, num_steps)
else:
if param.q is not None:
grid_points = np.arange(lower, upper + param.q, param.q) # check for log and for rounding issues
else:
raise ValueError("num_steps_dict is None or doesn't contain the number of points to divide " + param.name + " into. And its quantization factor is None. Please provide/set one of these values.")

if param.log:
grid_points = np.exp(grid_points)

# Avoiding rounding off issues
if grid_points[0] < param.lower:
grid_points[0] = param.lower
if grid_points[-1] > param.upper:
grid_points[-1] = param.upper

return tuple(grid_points)

elif isinstance(param, UniformIntegerHyperparameter):
if param.log:
lower, upper = np.log([param.lower, param.upper])
else:
lower, upper = param.lower, param.upper

if num_steps_dict is not None and param.name in num_steps_dict:
num_steps = num_steps_dict[param.name]
grid_points = np.linspace(lower, upper, num_steps)
else:
if param.q is not None:
grid_points = np.arange(lower, upper + param.q, param.q) # check for log and for rounding issues
else:
raise ValueError("num_steps_dict is None or doesn't contain the number of points to divide " + param.name + " into. And its quantization factor is None. Please provide/set one of these values.")

if param.log:
grid_points = np.exp(grid_points)
grid_points = grid_points.astype(int)

# Avoiding rounding off issues
if grid_points[0] < param.lower:
grid_points[0] = param.lower
if grid_points[-1] > param.upper:
grid_points[-1] = param.upper

return tuple(grid_points)

else:
raise TypeError("Unknown hyperparameter type %s" % type(param))

def get_cartesian_product(value_sets: List[Tuple], hp_names: List[str]):
'''
Returns a grid for a subspace of the configuration with given hyperparameters and their grid values.

Takes a list of tuples of grid values of the hyperparameters and list of hyperparameter names. The outer list iterates over the hyperparameters corresponding to the order in the list of hyperparameter names. The inner tuples contain grid values of the hyperparameters for each hyperparameter.

Parameters
----------
value_sets: list of tuples
Same description as return value of get_value_set()

hp_names: list of strs
List of hyperparameter names

Returns
-------
list of dicts
List of configuration dicts

'''
grid = []
import itertools
if len(value_sets) == 0:
pass # Edge case
else:
for element in itertools.product(*value_sets):
config_dict = {}
for j, hp_name in enumerate(hp_names):
config_dict[hp_name] = element[j]
grid.append(config_dict)

return grid


value_sets = [] # list of tuples: each tuple within is the grid values to be taken on by a Hyperparameter
hp_names = []

# Get HP names and allowed grid values they can take for the HPs at the top level of ConfigSpace tree
for hp_name in configuration_space._children['__HPOlib_configuration_space_root__']:
value_sets.append(get_value_set(num_steps_dict, hp_name))
hp_names.append(hp_name)

# Create a Cartesian product of above allowed values for the HPs. Hold them in an "unchecked" deque because some of the conditionally dependent HPs may become active for some of the elements of the Cartesian product and in these cases creating a Configuration would throw an Error (see below).
unchecked_grid_pts = deque(get_cartesian_product(value_sets, hp_names)) # Creates a deque of Configuration dicts
checked_grid_pts = []

while len(unchecked_grid_pts) > 0:
try:
grid_point = Configuration(configuration_space, unchecked_grid_pts[0])
checked_grid_pts.append(grid_point)
except ValueError as e:
assert str(e)[:23] == "Active hyperparameter '" and str(e)[-16:] == "' not specified!", "Caught exception contains unexpected message."
value_sets = []
hp_names = []
new_active_hp_names = []

for hp_name in unchecked_grid_pts[0]: # "for" loop over currently active HP names
value_sets.append(tuple([unchecked_grid_pts[0][hp_name], ]))
hp_names.append(hp_name)
for new_hp_name in configuration_space._children[hp_name]: # Checks if the conditionally dependent children of already active HPs are now active
if new_hp_name not in new_active_hp_names and new_hp_name not in unchecked_grid_pts[0]:
all_cond_ = True
for cond in configuration_space._parent_conditions_of[new_hp_name]:
if not cond.evaluate(unchecked_grid_pts[0]):
all_cond_ = False
if all_cond_:
new_active_hp_names.append(new_hp_name)


for hp_name in new_active_hp_names:
value_sets.append(get_value_set(num_steps_dict, hp_name))
hp_names.append(hp_name)
if len(new_active_hp_names) > 0: # this check might not be needed, as there is always going to be a new active HP when in this except block?
new_conditonal_grid = get_cartesian_product(value_sets, hp_names)
unchecked_grid_pts += new_conditonal_grid
else:
raise RuntimeError("Unexpected error: There should have been a newly activated hyperparameter for the current configuration values: " + str(unchecked_grid_pts[0]) + ". Please contact the developers with the code you ran and the stack trace.")
unchecked_grid_pts.popleft()

return checked_grid_pts
152 changes: 150 additions & 2 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@

from ConfigSpace import Configuration, ConfigurationSpace, UniformIntegerHyperparameter, \
UniformFloatHyperparameter, CategoricalHyperparameter, Constant, OrdinalHyperparameter, \
EqualsCondition, AndConjunction, OrConjunction
EqualsCondition, AndConjunction, OrConjunction, LessThanCondition, GreaterThanCondition
from ConfigSpace.read_and_write.pcs import read
from ConfigSpace.util import impute_inactive_values, get_random_neighbor, \
get_one_exchange_neighbourhood, deactivate_inactive_hyperparameters, fix_types
get_one_exchange_neighbourhood, deactivate_inactive_hyperparameters, fix_types, generate_grid
import ConfigSpace.c_util


Expand Down Expand Up @@ -342,3 +342,151 @@ def test_fix_types(self):
c = cs.get_default_configuration().get_dictionary()
c_str = {k: str(v) for k, v in c.items()}
self.assertEqual(fix_types(c_str, cs), c)

def test_generate_grid(self):
'''Test grid generation'''

# Sub-test 1
cs = ConfigurationSpace(seed=1234)

cat1 = CategoricalHyperparameter(name='cat1', choices=['T', 'F'])
const1 = Constant(name='const1', value=4)
float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False)
int1 = UniformIntegerHyperparameter(name='int1', lower=10, upper=100, log=True)
ord1 = OrdinalHyperparameter(name='ord1', sequence=['1', '2', '3'])

cs.add_hyperparameters([float1, int1, cat1, ord1, const1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please check for a few extreme cases, too? Such as only numerical, only categorical, only constant, only a single hyperparameter, no hyperparameters yet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the case of no hyperparameters, in get_cartesian_product, itertools.product() returns a single empty tuple element (I expected 0 elements) here: https://github.com/RaghuSpaceRajan/ConfigSpace/blob/19144c34b4b0cce2e9df1bb66943bbd17392c818/ConfigSpace/util.pyx#L566 which leads to a single empty Configuration being generated in the grid. Should we check for this and make sure no configuration is returned?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we check for this and make sure no configuration is returned?

Yes, I think so.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


num_steps_dict = {'float1': 11, 'int1': 6}
generated_grid = generate_grid(cs, num_steps_dict)

# Check randomly pre-selected values in the generated_grid
# 2 * 1 * 11 * 6 * 3 total diff. possible configurations
self.assertEqual(len(generated_grid), 396)
# Check 1st and last generated configurations completely:
first_expected_dict = {'cat1': 'T', 'const1': 4, 'float1': -1.0, 'int1': 10, 'ord1': '1'}
last_expected_dict = {'cat1': 'F', 'const1': 4, 'float1': 1.0, 'int1': 100, 'ord1': '3'}
self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict)
self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict)
self.assertEqual(generated_grid[198].get_dictionary()['cat1'], 'F')
self.assertEqual(generated_grid[45].get_dictionary()['const1'], 4)
# The 2 most frequently changing HPs (int1 and ord1) have 3 * 6 = 18 different values for
# each value of float1, so the 4th value of float1 of -0.4 is reached after
# 3 * 18 = 54 values in the generated_grid (and remains the same for the next 18 values):
for i in range(18):
self.assertAlmostEqual(generated_grid[54+i].get_dictionary()['float1'], -0.4, places=2)
# 5th diff. value for int1 after 4 * 3 = 12 values. Reasoning as above.
self.assertEqual(generated_grid[12].get_dictionary()['int1'], 63)
self.assertEqual(generated_grid[3].get_dictionary()['ord1'], '1')
self.assertEqual(generated_grid[4].get_dictionary()['ord1'], '2')
self.assertEqual(generated_grid[5].get_dictionary()['ord1'], '3')

# Sub-test 2
# Test for extreme cases: only numerical
cs = ConfigurationSpace(seed=1234)
cs.add_hyperparameters([float1, int1])

num_steps_dict = {'float1': 11, 'int1': 6}
generated_grid = generate_grid(cs, num_steps_dict)

self.assertEqual(len(generated_grid), 66)
# Check 1st and last generated configurations completely:
first_expected_dict = {'float1': -1.0, 'int1': 10}
last_expected_dict = {'float1': 1.0, 'int1': 100}
self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict)
self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict)

# Test: only categorical
cs = ConfigurationSpace(seed=1234)
cs.add_hyperparameters([cat1])

generated_grid = generate_grid(cs)

self.assertEqual(len(generated_grid), 2)
# Check 1st and last generated configurations completely:
self.assertEqual(generated_grid[0].get_dictionary()['cat1'], 'T')
self.assertEqual(generated_grid[-1].get_dictionary()['cat1'], 'F')

# Test: only constant
cs = ConfigurationSpace(seed=1234)
cs.add_hyperparameters([const1])

generated_grid = generate_grid(cs)

self.assertEqual(len(generated_grid), 1)
# Check 1st and only generated configuration completely:
self.assertEqual(generated_grid[0].get_dictionary()['const1'], 4)

# Test: no hyperparameters yet
cs = ConfigurationSpace(seed=1234)

generated_grid = generate_grid(cs, num_steps_dict)

# For the case of no hyperparameters, in get_cartesian_product, itertools.product() returns
# a single empty tuple element which leads to a single empty Configuration.
self.assertEqual(len(generated_grid), 0)

# Sub-test 3
# Tests for quantization and conditional spaces. num_steps_dict supports specifying steps
# for only some of the int and float HPs. The rest are taken from the 'q' member variables
# of these HPs. The conditional space tested has 2 levels of conditions.
cs2 = ConfigurationSpace(seed=123)
float1 = UniformFloatHyperparameter(name='float1', lower=-1, upper=1, log=False)
int1 = UniformIntegerHyperparameter(name='int1', lower=0, upper=1000, log=False, q=500)
cs2.add_hyperparameters([float1, int1])

int2_cond = UniformIntegerHyperparameter(name='int2_cond', lower=10, upper=100, log=True)
cs2.add_hyperparameters([int2_cond])
cond_1 = AndConjunction(LessThanCondition(int2_cond, float1, -0.5),
GreaterThanCondition(int2_cond, int1, 600))
cs2.add_conditions([cond_1])
cat1_cond = CategoricalHyperparameter(name='cat1_cond', choices=['apple', 'orange'])
cs2.add_hyperparameters([cat1_cond])
cond_2 = AndConjunction(GreaterThanCondition(cat1_cond, int1, 300),
LessThanCondition(cat1_cond, int1, 700),
GreaterThanCondition(cat1_cond, float1, -0.5),
LessThanCondition(cat1_cond, float1, 0.5)
)
cs2.add_conditions([cond_2])
float2_cond = UniformFloatHyperparameter(name='float2_cond',
lower=10., upper=100., log=True)
# 2nd level dependency in ConfigurationSpace tree being tested
cs2.add_hyperparameters([float2_cond])
cond_3 = GreaterThanCondition(float2_cond, int2_cond, 50)
cs2.add_conditions([cond_3])
num_steps_dict1 = {'float1': 4, 'int2_cond': 3, 'float2_cond': 3}
generated_grid = generate_grid(cs2, num_steps_dict1)
self.assertEqual(len(generated_grid), 18)

# RR: I manually generated the grid and verified the values were correct.
# Check 1st and last generated configurations completely:
first_expected_dict = {'float1': -1.0, 'int1': 0}
last_expected_dict = {'float1': -1.0, 'int1': 1000, 'int2_cond': 100, 'float2_cond': 100.0}
self.assertEqual(generated_grid[0].get_dictionary(), first_expected_dict)
self.assertEqual(generated_grid[-1].get_dictionary(), last_expected_dict)
# Here, we test that a few randomly chosen values in the generated grid
# correspond to the ones I checked.
self.assertEqual(generated_grid[3].get_dictionary()['int1'], 1000)
self.assertEqual(generated_grid[12].get_dictionary()['cat1_cond'], 'orange')
self.assertAlmostEqual(generated_grid[-2].get_dictionary()['float2_cond'],
31.622776601683803, places=3)

# Sub-test 4
# Test: only a single hyperparameter and num_steps_dict is None
cs = ConfigurationSpace(seed=1234)
cs.add_hyperparameters([float1])

num_steps_dict = {'float1': 11}
try:
generated_grid = generate_grid(cs)
except ValueError as e:
assert str(e) == "num_steps_dict is None or doesn't contain " \
"the number of points to divide float1 into. And its quantization " \
"factor is None. Please provide/set one of these values."

generated_grid = generate_grid(cs, num_steps_dict)

self.assertEqual(len(generated_grid), 11)
# Check 1st and last generated configurations completely:
self.assertEqual(generated_grid[0].get_dictionary()['float1'], -1.0)
self.assertEqual(generated_grid[-1].get_dictionary()['float1'], 1.0)