Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions databaker/jupybakeutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def svalue(cell):

class HDim:
"Dimension object which defines the lookup between an observation cell and a bag of header cells"
def __init__(self, hbagset, label, engine=None, direction=None, cellvalueoverride=None):
def __init__(self, hbagset, label, engine=None, direction=None, cellvalueoverride=None, apply=None):
self.label = label
self.name = label
self.hbagset = hbagset
Expand All @@ -46,11 +46,11 @@ def __init__(self, hbagset, label, engine=None, direction=None, cellvalueoverrid
# TODO: better, must be a cleaner way.
if isinstance(engine, WITHIN):
starting_offset, ending_offset, direction_of_travel = engine.unpack()
self.engine = WithinEngine(hbagset, direction, label, starting_offset, ending_offset, direction_of_travel, self.cellvalueoverride)
self.engine = WithinEngine(hbagset, direction, label, starting_offset, ending_offset, direction_of_travel, self.cellvalueoverride, apply)
elif engine.__name__ is DirectlyEngine.__name__:
self.engine = DirectlyEngine(hbagset, direction, label, self.cellvalueoverride)
self.engine = DirectlyEngine(hbagset, direction, label, self.cellvalueoverride, apply)
elif engine.__name__ is ClosestEngine.__name__:
self.engine = ClosestEngine(hbagset, direction, label, self.cellvalueoverride)
self.engine = ClosestEngine(hbagset, direction, label, self.cellvalueoverride, apply)
elif engine.__name__ is ConstantEngine.__name__:
self.engine = ConstantEngine(self.cellvalueoverride)
else:
Expand Down
8 changes: 5 additions & 3 deletions databaker/lookupengines/closest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json

from databaker.constants import ABOVE, BELOW, LEFT, RIGHT, DIRECTION_DICT
from databaker.lookupengines.generic import override_looked_up_cell
from databaker.lookupengines.generic import override_looked_up_cell, unpack_callables

class BoundaryError(Exception):
""" Raised when attempting to lookup outside the bounds of where a lookup can exist"""
Expand All @@ -10,7 +10,7 @@ def __init__(self, message):

class ClosestEngine(object):

def __init__(self, cell_bag, direction, label, cellvalueoverride):
def __init__(self, cell_bag, direction, label, cellvalueoverride, apply):
"""
Creates a lookup engine for dimensions defined with the CLOSEST relationship.

Expand Down Expand Up @@ -48,6 +48,7 @@ def __init__(self, cell_bag, direction, label, cellvalueoverride):
self.direction = direction
self.label = label
self.cellvalueoverride = cellvalueoverride if cellvalueoverride is not None else {}
self.apply_functions = unpack_callables(apply)

assert len(cell_bag) > 0, f'Aborting. The dimension {self.label} is defined as CLOSEST ' \
+ f'{DIRECTION_DICT[self.direction]} but an empty selection of cells has been ' \
Expand Down Expand Up @@ -237,5 +238,6 @@ def lookup(self, cell, index=None, ceiling=None, floor=0):

# Apply str level cell value override if applicable

cell, cell_value = override_looked_up_cell(r["dimension_cell"], self.cellvalueoverride)
cell, cell_value = override_looked_up_cell(r["dimension_cell"], self.cellvalueoverride,
self.apply_functions)
return cell, cell_value
8 changes: 5 additions & 3 deletions databaker/lookupengines/directly.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from databaker.constants import ABOVE, BELOW, LEFT, RIGHT, DIRECTION_DICT
from databaker.lookupengines.generic import override_looked_up_cell
from databaker.lookupengines.generic import override_looked_up_cell, unpack_callables

class DirectLookupException(Exception):
"""Raised when a DIRECT lookup fails"""
Expand All @@ -9,7 +9,7 @@ def __init__(self, message):

class DirectlyEngine(object):

def __init__(self, cell_bag, direction, label, cellvalueoverride):
def __init__(self, cell_bag, direction, label, cellvalueoverride, apply):
"""
We're going to write the cell_bag into a tiered dictionary (you
could use a flat dictionary, but this'll be quicker).
Expand Down Expand Up @@ -38,6 +38,7 @@ def __init__(self, cell_bag, direction, label, cellvalueoverride):
self.direction = direction
self.label = label
self.cellvalueoverride = cellvalueoverride if cellvalueoverride is not None else {}
self.apply_functions = unpack_callables(apply)

self.tiered_dict = {}

Expand Down Expand Up @@ -141,7 +142,8 @@ def lookup(self, cell):
" x:{}, y{}.".format(self.direction,cell.x, cell.y))

if self.last_cell_found is not None:
cell, cell_value = override_looked_up_cell(self.last_cell_found, self.cellvalueoverride)
cell, cell_value = override_looked_up_cell(self.last_cell_found, self.cellvalueoverride,
self.apply_functions)
return cell, cell_value

# If we fall through to here the lookup has failed, raise an exception
Expand Down
38 changes: 35 additions & 3 deletions databaker/lookupengines/generic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

def override_looked_up_cell(found_cell, cellvalueoverride):
def override_looked_up_cell(found_cell, cellvalueoverride, apply_functions):
str_override_applied = False
cell_override_applied = False

Expand All @@ -22,5 +22,37 @@ def override_looked_up_cell(found_cell, cellvalueoverride):
logging.warning(f'''Both a str (cellvalueoverride) and cell level (AddCellValueOverride)
override is being applied to cell {found_cell}. This should rarely or never be necessary
and can lead to confusing and difficult to debug behaviour.''')

return found_cell, value

for a_func in apply_functions:
if str_override_applied or cell_override_applied:
logging.warning(f'''You are applying (via apply=) a function to the dimension
header {found_cell} that has already had a cell value override applied to it.
The cell.value at time this function was applied was {value}.''')
value = a_func(value)

return found_cell, value

def unpack_callables(callable_thing_or_tuple_of):
"""
Where we are passing callables to the HDim apply= keyword, if not none sanity check
what we've got and make sure we return a tuple of them.
"""
if not callable_thing_or_tuple_of:
return ()

msg = '{} is being passed in via apply= but does not appear to be callable or tuple of callables'

# If it's a single callable, return it in a tuple
if not isinstance(callable_thing_or_tuple_of, tuple):
assert hasattr(callable_thing_or_tuple_of, "__call__"), msg.format(callable_thing_or_tuple_of)
return tuple([callable_thing_or_tuple_of])

# If it's already a tuple, make sure everything in it is callable
elif isinstance(callable_thing_or_tuple_of, tuple):
for a_hopefully_callable_thing in callable_thing_or_tuple_of:
assert hasattr(a_hopefully_callable_thing, "__call__"), msg.format(a_hopefully_callable_thing)
return callable_thing_or_tuple_of

else:
raise ValueError('''The argument to HDim(apply=) should either be single callable (eg a function,
lambda, or a class with a .__call__ method or a tuple of these things.''')
8 changes: 4 additions & 4 deletions databaker/lookupengines/within.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

from databaker.constants import ABOVE, BELOW, UP, DOWN, LEFT, RIGHT, DIRECTION_DICT
from databaker.lookupengines.generic import override_looked_up_cell
from databaker.lookupengines.generic import override_looked_up_cell, unpack_callables

# Essentially a factory function for the actual WithinEngine
# I don't particularly like breaking the python convention of UPPERCLASS == a constant
Expand Down Expand Up @@ -88,8 +88,7 @@ def unpack(self):

class WithinEngine(object):

def __init__(self, cell_bag, direction, label, starting_offset, ending_offset, direction_of_travel, cellvalueoverride):
self.cellvalueoverride = cellvalueoverride
def __init__(self, cell_bag, direction, label, starting_offset, ending_offset, direction_of_travel, cellvalueoverride, apply):
"""
Creates a lookup engine to resolve a WITHIN(<a given range of offsets>) lookup.

Expand Down Expand Up @@ -131,6 +130,7 @@ def __init__(self, cell_bag, direction, label, starting_offset, ending_offset, d
self.direction_of_travel = direction_of_travel
self.cellvalueoverride = cellvalueoverride
self.cell_bag = cell_bag
self.apply_functions = unpack_callables(apply)

self.sequence = self._sequencer(cell_bag) # see docstring

Expand Down Expand Up @@ -332,5 +332,5 @@ def lookup(self, cell):
raise ValueError(f'Unsuccessful within lookup for cell {cell} in dimension "{self.label}". Direction was {DIRECTION_DICT[self.direction]}'
f' and we were scanning {DIRECTION_DICT[self.direction_of_travel]} but no header cell was found in the specified range.')

cell, cell_value = override_looked_up_cell(found_cell, self.cellvalueoverride)
cell, cell_value = override_looked_up_cell(found_cell, self.cellvalueoverride, self.apply_functions)
return cell, cell_value
82 changes: 82 additions & 0 deletions features/applycallable.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
Feature: Apply callables passed in at runtime to a dimension constructor
As a data enginner. When I construct a databaker dimension, I want to be able to pass in a function, lambda or
other callable that acts upon any values returned by a lookup to a dimension.

Scenario Outline: Apply a callable to a dimension constructor for a CLOSEST engine
Given we load a file named <File Name>
And select the sheet "Sheet1"
And we define cell selections as
| key | value |
| month | tab.excel_ref("B6:B25").is_not_blank() |
| observations | tab.excel_ref("C6:I25") |
And we define the dimensions as
"""
HDim(month, "Month", CLOSEST, ABOVE, apply=lambda x: f"I am the month: {x}")
"""
Then the lookup from an observation in cell "C8" to the dimension "Month" returns the value "I am the month: Jan"
And the lookup from an observation in cell "C13" to the dimension "Month" returns the value "I am the month: Apr"
And the lookup from an observation in cell "C18" to the dimension "Month" returns the value "I am the month: Jul"
And the lookup from an observation in cell "C23" to the dimension "Month" returns the value "I am the month: Oct"

Examples: File Types
| File Name |
| "bakingtestdataset.xls" |
| "bakingtestdataset.xlsx" |

Scenario Outline: Apply funa callable to a dimension constructor for a DIRECT engine
Given we load a file named <File Name>
And select the sheet "Sheet1"
And we define cell selections as
| key | value |
| top_dims | tab.excel_ref("D5:I5") |
| observations | tab.excel_ref("C6:I25") |
And we define the dimensions as
"""
HDim(top_dims, "Top Dims", DIRECTLY, ABOVE, apply=lambda x: f'{x} got this text added.')
"""
Then the lookup from an observation in cell "D8" to the dimension "Top Dims" returns the value "Dim 1 got this text added."
And the lookup from an observation in cell "E8" to the dimension "Top Dims" returns the value "Dim 2 got this text added."
And the lookup from an observation in cell "F8" to the dimension "Top Dims" returns the value "Dim 3 got this text added."
And the lookup from an observation in cell "G8" to the dimension "Top Dims" returns the value "Dim 4 got this text added."

Examples: File Types
| File Name |
| "bakingtestdataset.xls" |
| "bakingtestdataset.xlsx" |

Given we load a file named "bakingtestdataset.xls"
And select the sheet "Sheet1"
And we define cell selections as
| key | value |
| cats_and_dogs | tab.excel_ref("3").is_not_blank() |
| observations | tab.excel_ref("C6:I25") |
And we define the dimensions as
"""
HDim(cats_and_dogs, "Cats And Dogs", WITHIN(right=2, left=1), ABOVE, apply=lambda x: f'I love {x} a whole bunch')
"""
Then the lookup from an observation in cell "C25" to the dimension "Cats And Dogs" returns the value "I love Cats a whole bunch"
And the lookup from an observation in cell "F25" to the dimension "Cats And Dogs" returns the value "I love Cats a whole bunch"
And the lookup from an observation in cell "G25" to the dimension "Cats And Dogs" returns the value "I love Dogs a whole bunch"
And the lookup from an observation in cell "I25" to the dimension "Cats And Dogs" returns the value "I love Dogs a whole bunch"


Scenario Outline: Apply multiple ordered callables to a dimension constructor for a DIRECT engine
Given we load a file named <File Name>
And select the sheet "Sheet1"
And we define cell selections as
| key | value |
| top_dims | tab.excel_ref("D5:I5") |
| observations | tab.excel_ref("C6:I25") |
And we define the dimensions as
"""
HDim(top_dims, "Top Dims", DIRECTLY, ABOVE, apply=(lambda x: f'{x} got this text added.', lambda x: x.replace(' got this', '')))
"""
Then the lookup from an observation in cell "D8" to the dimension "Top Dims" returns the value "Dim 1 text added."
And the lookup from an observation in cell "E8" to the dimension "Top Dims" returns the value "Dim 2 text added."
And the lookup from an observation in cell "F8" to the dimension "Top Dims" returns the value "Dim 3 text added."
And the lookup from an observation in cell "G8" to the dimension "Top Dims" returns the value "Dim 4 text added."

Examples: File Types
| File Name |
| "bakingtestdataset.xls" |
| "bakingtestdataset.xlsx" |
22 changes: 22 additions & 0 deletions features/steps/load_xls.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,11 +327,33 @@ def step_impl(context, ob_cell_excel_ref, dimension_name, expecting):
looked_up_cell, _ = dimension.celllookup(ob_cell)
assert str(looked_up_cell) == expecting, f'Got {str(looked_up_cell)}, expected {expecting}'

@then('the lookup from an observation in cell "{ob_cell_excel_ref}" to the dimension "{dimension_name}" returns the value "{expecting}"')
def step_impl(context, ob_cell_excel_ref, dimension_name, expecting):
dimension = [x for x in context.dimensions if x.name == dimension_name]
assert len(dimension) == 1, f'Could not find a dimension named {dimension_name}'
dimension = dimension[0]

observation_selection = [context.selections[x] for x in context.selections if x == "observations"][0]
ob_cell = None
for cell in observation_selection:
if xypath.contrib.excel.excel_location(cell) == ob_cell_excel_ref:
ob_cell = cell
break
else:
raise ValueError(f'Could not find a selected observation cell for excel reference {ob_cell_excel_ref}')

assert ob_cell is not None

_, obs_value = dimension.celllookup(ob_cell)
assert obs_value == expecting, f'Got "{obs_value}", expected "{expecting}"'

@then('it throws an error of type "{err_type}"')
def step(context, err_type):
assert type(context.exc) == eval(err_type), f'Unexpected error type. Expected: "{type(context.exc)}". Got: "{eval(err_type)}".'

@then('it throws an error containing the text "{err_text}"')
def step(context, err_text):
assert err_text in str(context.exc), f'Expected error text "{err_text}", not found in {str(context.exc)}".'

@then(u'we are given the exception message')
def step_impl(context):
Expand Down