diff --git a/CONTRIBUTORS.TXT b/CONTRIBUTORS.TXT index 48602bc9..ed669f73 100644 --- a/CONTRIBUTORS.TXT +++ b/CONTRIBUTORS.TXT @@ -1,3 +1,4 @@ Piotr Mitros Oren Livne Paul Deane +Bradley Erickson diff --git a/Makefile b/Makefile index 689ba465..f22128c1 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ PACKAGES ?= wo,awe run: # If you haven't done so yet, run: make install # we need to make sure we are on the virtual env when we do this - cd learning_observer && python learning_observer --watchdog=restart + cd learning_observer && python learning_observer venv: # This is unnecessary since LO installs requirements on install. @@ -34,6 +34,7 @@ install-packages: venv pip install -e learning_observer/[${PACKAGES}] # Just a little bit of dependency hell... + # The AWE Components are built using a specific version of # `spacy`. This requires an out-of-date `typing-extensions` # package. There are few other dependecies that require a @@ -42,7 +43,16 @@ install-packages: venv # components. # TODO remove this extra step after AWE Component's `spacy` # is no longer version locked. - pip install -U typing-extensions + # This is no longer an issue, but we will leave until all + # dependecies can be resolved in the appropriate locations. + # pip install -U typing-extensions + + # On Python3.11 with tensorflow, we get some odd errors + # regarding compatibility with `protobuf`. Some installation + # files are missing from the protobuf binary on pip. + # Using the `--no-binary` option includes all files. + pip uninstall -y protobuf + pip install --no-binary=protobuf protobuf==4.25 # testing commands test: diff --git a/awe_requirements.txt b/awe_requirements.txt index 2fde4a55..04288739 100644 --- a/awe_requirements.txt +++ b/awe_requirements.txt @@ -1,3 +1,6 @@ +spacy==3.4.4 +pydantic==1.10 +spacytextblob==3.0.1 AWE_SpellCorrect @ git+https://github.com/ETS-Next-Gen/AWE_SpellCorrect.git AWE_Components @ git+https://github.com/ETS-Next-Gen/AWE_Components.git AWE_Lexica @ git+https://github.com/ETS-Next-Gen/AWE_Lexica.git diff --git a/learning_observer/learning_observer/adapters/adapter.py b/learning_observer/learning_observer/adapters/adapter.py index aacf8f12..ea456a7d 100644 --- a/learning_observer/learning_observer/adapters/adapter.py +++ b/learning_observer/learning_observer/adapters/adapter.py @@ -51,11 +51,20 @@ def dash_to_underscore(event): return event - common_transformers = [ dash_to_underscore ] +def add_common_migrator(migrator, file): + '''Add a migrator to the common transformers list. + TODO + We ought check each module on startup for migrators + and import them instead of using this function to + add them to the transformations. + ''' + print('Adding migrator', migrator, 'from', file), + common_transformers.append(migrator) + class EventAdapter: def __init__(self, metadata=None): diff --git a/learning_observer/learning_observer/communication_protocol/README.md b/learning_observer/learning_observer/communication_protocol/README.md new file mode 100644 index 00000000..86b3f798 --- /dev/null +++ b/learning_observer/learning_observer/communication_protocol/README.md @@ -0,0 +1,55 @@ +# Communication Protocol / Query Language + +## Motivation + +In our first version of this system, we would simply compile the state for all the students, and ship that to the dashboard. However, that didn't allow us to make interactive dashboards, so we created a query language. This is inspired by SQL (with JOIN and friends), but designed for streaming data. + +It can be written in Python or, soon, JavaScript, which compile queries to a JSON object. The JSON object is very similar to SQL. + +## Security model + +We allow two modes of operation: + +- **Predefined queries** are designed for production use. The client cannot make arbitrary queries. +- **Open queries** are designed for development and data analysis, for example, working from a Jupyter notebook. This allows arbitrary queries, including ones which might not be performant or which might reveal sensitive data. + +The latter should only be used in a trusted environment, and on a read replica. + +## Shorthand / Getting Started + +For common queries, we have shorthand notation, to maintain simplicity. In the majority of cases, we want just want the latest reducer data for either a single student or a classroom of students. + +In `module.py`, you see this line: + +```python +EXECUTION_DAG = learning_observer.communication_protocol.util.generate_base_dag_for_student_reducer('student_event_counter', 'my_event_module') +``` + +This is shorthand for a common query which JOINs the class roster with the output of the reducers. The Python code for the query itself is [here](https://github.com/ETS-Next-Gen/writing_observer/blob/berickson/workshop/learning_observer/learning_observer/communication_protocol/util.py#L58), but the jist of the code is: + +```python +'roster': course_roster(runtime=q.parameter('runtime'), course_id=q.parameter("course_id", required=True)), +keys_node: q.keys(f'{module}.{reducer}', STUDENTS=q.variable('roster'), STUDENTS_path='user_id'), +select_node: q.select(q.variable(keys_node), fields=q.SelectFields.All), +join_node: q.join(LEFT=q.variable(select_node), RIGHT=q.variable('roster'), LEFT_ON='provenance.provenance.value.user_id', RIGHT_ON='user_id') +``` + +You can add a `print(EXECUTION_DAG)` statement to see the JSON representation this compiles to. + +To see the data protocol, open up develop tools from your browser, click on network, and see the communication_protocol response. + +## Playing / Debugging / Interactive operations + +* `debugger.py` has a view for executing queries manually. +* `explorer.py` has a view for showing predefined queries already on the server, and running those. + +As of this writing, these are likely broken, as it has not been recently tested and there were code changes. Both of these should also: +* Be available from the Jupyter notebook in the future +* Have a command line / test case version + +## Python Query Language + + + +## JSON Query Language + diff --git a/learning_observer/learning_observer/communication_protocol/debugger.py b/learning_observer/learning_observer/communication_protocol/debugger.py index 19019395..e30f7bc5 100644 --- a/learning_observer/learning_observer/communication_protocol/debugger.py +++ b/learning_observer/learning_observer/communication_protocol/debugger.py @@ -1,6 +1,13 @@ ''' This provides a web interface for making queries via the communication protocol and seeing the text of the results. + +TODO: + +* This isn't really a debugger. Perhaps this should be called + interactive mode? Or developer mode? Or similar? +* Ideally, this should be moved to the Jupyter notebook +* Make work with the new async generator pipeline ''' from dash import html, callback, Output, Input, State @@ -11,6 +18,7 @@ import lo_dash_react_components as lodrc +# These are IDs for page elements, used in the layout and for callbacks prefix = 'communication-debugger' ws = f'{prefix}-websocket' status = f'{prefix}-connection-status' @@ -27,7 +35,7 @@ def layout(): html.H1('Communication Protocol Debugger'), lodrc.LOConnection( id=ws, - url='ws://localhost:8888/wsapi/communication_protocol' + url='ws://localhost:8888/wsapi/communication_protocol' # HACK/TODO: This might not be 8888. We should use the default port. ), html.Div(id=status) ] @@ -69,6 +77,9 @@ def layout(): def create_status(title, icon): + ''' + Are we connected to the server? Connecting? Disconnected? Used by update_status below + ''' return html.Div( [ html.I(className=f'{icon} me-1'), @@ -82,6 +93,9 @@ def create_status(title, icon): Input(ws, 'state') ) def update_status(state): + ''' + Called when we connect / disconnect / etc. + ''' icons = ['fas fa-sync-alt', 'fas fa-check text-success', 'fas fa-sync-alt', 'fas fa-times text-danger'] titles = ['Connecting to server', 'Connected to server', 'Closing connection', 'Disconnected from server'] index = state.get('readyState', 3) if state is not None else 3 @@ -93,6 +107,10 @@ def update_status(state): Input(message, 'value') ) def determine_valid_json(value): + ''' + Disable or enable to submit button, so we can only submit a + query if it is valid JSON + ''' if value is None: return True try: @@ -108,6 +126,9 @@ def determine_valid_json(value): State(message, 'value') ) def send_message(clicks, value): + ''' + Send a message to the communication protocol on the web socket. + ''' if clicks is None: raise PreventUpdate return value @@ -118,6 +139,10 @@ def send_message(clicks, value): Input(ws, 'message') ) def receive_message(message): + ''' + Shows messages from the web socket in the field with ID + `response` (defined on top) + ''' if message is None: return {} return json.loads(message.get("data", {})) diff --git a/learning_observer/learning_observer/communication_protocol/executor.py b/learning_observer/learning_observer/communication_protocol/executor.py index 6f235273..3b0fd0ea 100644 --- a/learning_observer/learning_observer/communication_protocol/executor.py +++ b/learning_observer/learning_observer/communication_protocol/executor.py @@ -18,7 +18,7 @@ import learning_observer.stream_analytics.fields import learning_observer.stream_analytics.helpers from learning_observer.log_event import debug_log -from learning_observer.util import get_nested_dict_value, clean_json +from learning_observer.util import get_nested_dict_value, clean_json, ensure_async_generator, async_zip from learning_observer.communication_protocol.exception import DAGExecutionException dispatch = learning_observer.communication_protocol.query.dispatch @@ -142,7 +142,7 @@ def substitute_parameter(parameter_name, parameters, required, default): @handler(learning_observer.communication_protocol.query.DISPATCH_MODES.JOIN) -def handle_join(left, right, left_on, right_on): +async def handle_join(left, right, left_on, right_on): """ We dispatch this function whenever we process a DISPATCH_MODES.JOIN node. Users will use this when they want to combine the output of multiple nodes. @@ -156,88 +156,69 @@ def handle_join(left, right, left_on, right_on): ``` Generic join where left.lid == right.rid - >>> handle_join( + >>> asyncio.run(async_generator_to_list(handle_join( ... left=[{'lid': 1, 'left': True}, {'lid': 2, 'left': True}], ... right=[{'rid': 2, 'right': True}, {'rid': 1, 'right': True}], ... left_on='lid', right_on='rid' - ... ) + ... ))) [{'lid': 1, 'left': True, 'rid': 1, 'right': True}, {'lid': 2, 'left': True, 'rid': 2, 'right': True}] We return every item in `left` even if they do not have a matching item in `right`. This also demonstrates the behavior for `RIGHT_ON` not being found in one of the elements of `right`. - >>> handle_join( + >>> asyncio.run(async_generator_to_list(handle_join( ... left=[{'lid': 1, 'left': True}, {'lid': 2, 'left': True}], ... right=[{'right': True}, {'rid': 1, 'right': True}], ... left_on='lid', right_on='rid' - ... ) + ... ))) [{'lid': 1, 'left': True, 'rid': 1, 'right': True}, {'lid': 2, 'left': True}] - When `LEFT_ON` is not found, we return an error. Instead of throwing exceptions, - we return errors like normal results and allow the DAG executor to handle package - them and bubble them up. This allows to only error on a singular item and allow - the others to continue running. - >>> handle_join( + When `LEFT_ON` is not found, we return an whatever is in `left`. + >>> asyncio.run(async_generator_to_list(handle_join( ... left=[{'left': True}, {'lid': 2, 'left': True}], ... right=[{'rid': 2, 'right': True}, {'rid': 1, 'right': True}], ... left_on='lid', right_on='rid' - ... ) - [{'error': "KeyError: key `lid` not found in `dict_keys(['left'])`", 'function': 'handle_join', 'error_provenance': {'target': {'left': True}, 'key': 'lid', 'exception': KeyError("Key lid not found in {'left': True}")}, 'timestamp': ... 'traceback': ... {'lid': 2, 'left': True, 'rid': 2, 'right': True}] + ... ))) + [{'left': True}, {'lid': 2, 'left': True, 'rid': 2, 'right': True}] """ right_dict = {} - for d in right: + async for d in ensure_async_generator(right): try: nested_value = get_nested_dict_value(d, right_on) right_dict[nested_value] = d except KeyError as e: pass - - result = [] - for left_dict in left: + async for left_dict in ensure_async_generator(left): try: lookup_key = get_nested_dict_value(left_dict, left_on) right_dict_match = right_dict.get(lookup_key) - if right_dict_match: merged_dict = {**left_dict, **right_dict_match} else: # defaults to left_dict if not match isn't found merged_dict = left_dict - result.append(merged_dict) + yield merged_dict except KeyError as e: - result.append(left_dict) + # TODO should we throw an error if we can't find a match in + # right or should we just yield left as is? + yield left_dict # result.append(DAGExecutionException( # f'KeyError: key `{left_on}` not found in `{left_dict.keys()}`', # inspect.currentframe().f_code.co_name, # {'target': left_dict, 'key': left_on, 'exception': e} # ).to_dict()) - return result - - -def exception_wrapper(func): - """ - When we map values across a function, we want to catch any errors that may occur. - For asynchronous functions, we are able to use `asyncio.gather` which allows us to return - exceptions as normal results. This wrapper mimics this behavior for synchronous functions - and returns any exceptions as normal results. These exceptions are later caught by the - DAG executor and handled appropriately. This allows the system to keep executing the DAG even - if some values raise exceptions. - """ - def exception_catcher(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as e: - return e - return exception_catcher - async def map_coroutine_serial(func, values, value_path): """ We call map for coroutine functions operating in serial. See the `handle_map` function for more details regarding parameters. """ - return await asyncio.gather(*[func(get_nested_dict_value(v, value_path)) for v in values], return_exceptions=True) + async for v in ensure_async_generator(values): + try: + yield await func(get_nested_dict_value(v, value_path)), v + except Exception as e: + yield e, v async def map_coroutine_parallel(func, values, value_path): @@ -245,71 +226,98 @@ async def map_coroutine_parallel(func, values, value_path): We call map for coroutine functions operating in parallel. See the `handle_map` function for more details regarding parameters. """ - raise DAGExecutionException( - 'Asynchronous parallelization has not yet been implemented.', - inspect.currentframe().f_code.co_name, - {'function': func, 'values': values, 'value_path': value_path} - ) + async def _return_result_and_value(v): + '''Wrapper for the function to return both the result and + the value passed in. The value is yielded to annotate the + results metadata. + ''' + try: + result = await func(get_nested_dict_value(v, value_path)) + except Exception as e: + result = e + return result, v + tasks = [] + async for v in ensure_async_generator(values): + tasks.append(_return_result_and_value(v)) + for task in asyncio.as_completed(tasks): + task_result, task_value = await task + yield task_result, task_value -def map_parallel(func, values, value_path): + +async def map_parallel(func, values, value_path): """ We call map for synchronous functions operating in parallel. See the `handle_map` function for more details regarding parameters. """ - with concurrent.futures.ProcessPoolExecutor() as executor: - # TODO catch any errors from get_nested_dict_value() - futures = [executor.submit(func, get_nested_dict_value(v, value_path)) for v in values] - results = [future.result() for future in futures] - return results + def _return_result_and_value(v): + '''Wrapper for the function to return both the result and + the value passed in. The value is yielded to annotate the + results metadata. + ''' + try: + result = func(get_nested_dict_value(v, value_path)) + except Exception as e: + result = e + return result, v + + loop = asyncio.get_event_loop() + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [] + async for v in ensure_async_generator(values): + futures.append(loop.run_in_executor(executor, _return_result_and_value, v)) + for future in asyncio.as_completed(futures): + future_result, future_value = await future + yield future_result, future_value -def map_serial(func, values, value_path): +async def map_serial(func, values, value_path): """ We call map for synchronous functions operating in serial. See the `handle_map` function for more details regarding parameters. """ - outputs = [] - for v in values: + async for v in ensure_async_generator(values): try: - output = func(get_nested_dict_value(v, value_path)) - except DAGExecutionException as e: - output = e.to_dict() - outputs.append(output) - return outputs + yield func(get_nested_dict_value(v, value_path)), v + except Exception as e: + yield e, v -def annotate_map_metadata(function, results, values, value_path, func_kwargs): +async def _annotate_map_results_with_metadata(function, results, value_path, func_kwargs): """ - We annotate the list of raw results from mapping over a function with provenance - about the values passed in and the function used. Additionally, we want to - provide the proper metadata and output for any exceptions that took place - during execution of the map. + Each of the map functions yields the result (or errors) along with the value. + This function processes the output and the provenance to be further used in + the communicaton protocol. + If the result from the map is a dictionary, we use that as our base output. + If the result from the map is just a value, we wrap it in a dictionary. + If the result is an Exception, we wrap it in a DAGExecutionException and + use that as our result. This allows for some items to fail while others + were processed just fine. + Lastly, the provenance is added to our result. """ - output = [] - for res, item in zip(results, values): + async for map_result, item in results: provenance = { 'function': function, 'func_kwargs': func_kwargs, - 'value': item, - 'value_path': value_path + 'value': {k: v for k, v in item.items() if k != 'provenance'}, + 'value_path': value_path, + 'provenance': item['provenance'] if 'provenance' in item else {} } - if isinstance(res, dict): - out = res - elif isinstance(res, Exception): + if isinstance(map_result, dict): + out = map_result + elif isinstance(map_result, Exception): error_provenance = provenance.copy() - error_provenance['error'] = str(res) + error_provenance['error'] = str(map_result) out = DAGExecutionException( f'Function {function} did not execute properly during map.', inspect.currentframe().f_code.co_name, error_provenance, - res.__traceback__ + map_result.__traceback__ ).to_dict() else: - out = {'output': res} + out = {'output': map_result} out['provenance'] = provenance - output.append(out) - return output + yield out MAPS = { @@ -337,45 +345,48 @@ async def handle_map(functions, function_name, values, value_path, func_kwargs=N ... raise ValueError("Input must be an int") ... return x * 2 + >>> async def process_map_test_result(func): + ... '''The map functions return an async generator. + ... This function awaits the creation of the generator and drives it. + ... ''' + ... result = await func + ... return await async_generator_to_list(result) + Generic example of mapping a double function over [0, 1]. - >>> asyncio.run(handle_map({'double': double}, 'double', [{'path': i} for i in range(2)], 'path')) - [{'output': 0, 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 0}, 'value_path': 'path'}}, {'output': 2, 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 1}, 'value_path': 'path'}}] + >>> asyncio.run(process_map_test_result(handle_map({'double': double}, 'double', [{'path': i} for i in range(2)], 'path'))) + [{'output': 0, 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 0}, 'value_path': 'path', 'provenance': {}}}, {'output': 2, 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 1}, 'value_path': 'path', 'provenance': {}}}] Exceptions in each function with in the map are returned with normal results and handled later by the DAG executor. In our text, we return both a normal result and the result of an exception being caught. - >>> asyncio.run(handle_map({'double': double}, 'double', [{'path': i} for i in [1, 'fail']], 'path')) - [{'output': 2, 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 1}, 'value_path': 'path'}}, {'error': 'Function double did not execute properly during map.', 'function': 'annotate_map_metadata', 'error_provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 'fail'}, 'value_path': 'path', 'error': 'Input must be an int'}, 'timestamp': ... 'traceback': ... 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 'fail'}, 'value_path': 'path'}}] + >>> asyncio.run(process_map_test_result(handle_map({'double': double}, 'double', [{'path': i} for i in [1, 'fail']], 'path'))) + [{'output': 2, 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 1}, 'value_path': 'path', 'provenance': {}}}, {'error': 'Function double did not execute properly during map.', 'function': '_annotate_map_results_with_metadata', 'error_provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 'fail'}, 'value_path': 'path', 'provenance': {}, 'error': 'Input must be an int'}, 'timestamp': ..., 'traceback': ..., 'provenance': {'function': 'double', 'func_kwargs': {}, 'value': {'path': 'fail'}, 'value_path': 'path', 'provenance': {}}}] Example of trying to call nonexistent function, `triple` - >>> asyncio.run(handle_map({'double': double}, 'triple', [{'path': i} for i in range(2)], 'path')) - Traceback (most recent call last): - ... - learning_observer.communication_protocol.exception.DAGExecutionException: ('Could not find function `triple` in available functions.', 'handle_map', {'function_name': 'triple', 'available_functions': dict_keys(['double']), 'error': "'triple'"}, ...) + >>> asyncio.run(process_map_test_result(handle_map({'double': double}, 'triple', [{'path': i} for i in range(2)], 'path'))) + [{'error': 'Could not find function `triple` in available functions.', 'function': 'handle_map', 'error_provenance': {'function_name': 'triple', 'available_functions': dict_keys(['double']), 'error': "'triple'"}, 'timestamp': ..., 'traceback': ...}] """ if func_kwargs is None: func_kwargs = {} try: func = functions[function_name] except KeyError as e: - raise DAGExecutionException( + exception = DAGExecutionException( f'Could not find function `{function_name}` in available functions.', inspect.currentframe().f_code.co_name, {'function_name': function_name, 'available_functions': functions.keys(), 'error': str(e)}, e.__traceback__ - ) + ).to_dict() + return ensure_async_generator(exception) func_with_kwargs = functools.partial(func, **func_kwargs) is_coroutine = inspect.iscoroutinefunction(func) map_function = MAPS[f'map{"_coroutine" if is_coroutine else ""}_{"parallel" if parallel else "serial"}'] - if not is_coroutine: - # wrap sync functions to return errors similar to asyncio.gather - func_with_kwargs = exception_wrapper(func_with_kwargs) results = map_function(func_with_kwargs, values, value_path) if inspect.isawaitable(results): results = await results - output = annotate_map_metadata(function_name, results, values, value_path, func_kwargs) + output = _annotate_map_results_with_metadata(function_name, results, value_path, func_kwargs) return output @@ -400,8 +411,7 @@ async def handle_select(keys, fields=learning_observer.communication_protocol.qu if fields is None or fields == learning_observer.communication_protocol.query.SelectFields.Missing: fields_to_keep = {} - response = [] - for k in keys: + async for k in ensure_async_generator(keys): if isinstance(k, dict) and 'key' in k: # output from query added to response later query_response_element = { @@ -436,8 +446,7 @@ async def handle_select(keys, fields=learning_observer.communication_protocol.qu ).to_dict() # add necessary outputs to query response query_response_element[fields_to_keep[f]] = value - response.append(query_response_element) - return response + yield query_response_element # @handler(learning_observer.communication_protocol.query.DISPATCH_MODES.KEYS) @@ -459,10 +468,58 @@ def handle_keys(function, value_path, **kwargs): return unimplemented_handler() +async def _extract_fields_with_provenance_for_students(students, student_path): + '''This is a helper function for the `hack_handle_keys` function. + This function prepares the key field dictionary and the provenance + for each student. + The key field dictionary is used to create the key we are attempting + to fetch from the KVS (used later in `hack_handle_keys`). The passed in + `item_path` is used for setting the appropriate dictionary value. + The provenance is the current history of the communication protocol for each item. + ''' + async for s in ensure_async_generator(students): + s_field = get_nested_dict_value(s, student_path, '') + field = { + learning_observer.stream_analytics.fields.KeyField.STUDENT: s_field + } + provenance = s.get('provenance', {'value': s}) + provenance[student_path] = s_field + yield field, {'STUDENT': provenance} + + +async def _extract_fields_with_provenance_for_students_and_resources(students, student_path, resources, resources_path): + '''This is a helper function for the `hack_handle_keys` function. + This function prepares the key field dictionary and the provenance + for each student/resource pair. + The key field dictionary is used to create the key we are attempting + to fetch from the KVS (used later in `hack_handle_keys`). The passed in + `item_path` is used for setting the appropriate dictionary value. + The provenance is the current history of the communication protocol for each item. + ''' + async for s, r in async_zip(students, resources): + s_field = get_nested_dict_value(s, student_path, '') + r_field = get_nested_dict_value(r, resources_path, '') + fields = { + learning_observer.stream_analytics.fields.KeyField.STUDENT: s_field, + learning_observer.stream_analytics.helpers.EventField('doc_id'): r_field + } + s_provenance = s.get('provenance', {'value': s}) + s_provenance[student_path] = s_field + r_provenance = r.get('provenance', {'value': r}) + r_provenance[resources_path] = r_field + provenance = { + 'STUDENT': s_provenance, + 'RESOURCE': r_provenance + } + yield fields, provenance + + @handler(learning_observer.communication_protocol.query.DISPATCH_MODES.KEYS) -def hack_handle_keys(function, STUDENTS=None, STUDENTS_path=None, RESOURCES=None, RESOURCES_path=None): +async def hack_handle_keys(function, STUDENTS=None, STUDENTS_path=None, RESOURCES=None, RESOURCES_path=None): """ - We INSTEAD dispatch this function whenever we process a DISPATCH_MODES.KEYS node. + This function is a HACK that is being used instead of `handle_keys` for any + `DISPATCH_MODE.KEYS` nodes. + Whenever a user wants to perform a select operation, they first must make sure their keys are formatted properly. This method builds the keys to access the appropriate reducers output. @@ -472,46 +529,26 @@ def hack_handle_keys(function, STUDENTS=None, STUDENTS_path=None, RESOURCES=None associated with each. These are zipped together and returned to the user. """ func = next((item for item in learning_observer.module_loader.reducers() if item['id'] == function), None) - fields = [] - provenances = [] + fields_and_provenances = None if STUDENTS is not None and RESOURCES is None: - # handle only students - fields = [ - { - learning_observer.stream_analytics.fields.KeyField.STUDENT: get_nested_dict_value(s, STUDENTS_path) # TODO catch get_nested_dict_value errors - } for s in STUDENTS - ] - provenances = [s.get('provenance', {'value': s}) for s in STUDENTS] + fields_and_provenances = _extract_fields_with_provenance_for_students(STUDENTS, STUDENTS_path) elif STUDENTS is not None and RESOURCES is not None: - # handle both students and resources - fields = [ - { - learning_observer.stream_analytics.fields.KeyField.STUDENT: get_nested_dict_value(s, STUDENTS_path), # TODO catch get_nested_dict_value errors - learning_observer.stream_analytics.helpers.EventField('doc_id'): get_nested_dict_value(r, RESOURCES_path, '') # TODO catch get_nested_dict_value errors - } for s, r in zip(STUDENTS, RESOURCES) - ] - provenances = [ - { - 'STUDENT': s.get('provenance', {'value': s}), - 'RESOURCE': r.get('provenance', {'value': r}) - } for s, r in zip(STUDENTS, RESOURCES) - ] - - keys = [] - for f, p in zip(fields, provenances): + fields_and_provenances = _extract_fields_with_provenance_for_students_and_resources(STUDENTS, STUDENTS_path, RESOURCES, RESOURCES_path) + + if fields_and_provenances is None: + return + async for f, p in fields_and_provenances: key = learning_observer.stream_analytics.helpers.make_key( func['function'], f, learning_observer.stream_analytics.fields.KeyStateType.INTERNAL ) - keys.append( - { - 'key': key, - 'provenance': p, - 'default': func['default'] - } - ) - return keys + key_wrapper = { + 'key': key, + 'provenance': p, + 'default': func['default'] + } + yield key_wrapper def _has_error(node): @@ -598,6 +635,11 @@ def strip_provenance(variable): return variable +async def _clean_json_via_generator(iterator): + async for item in ensure_async_generator(iterator): + yield clean_json(item) + + async def execute_dag(endpoint, parameters, functions, target_exports): """ This is the primary way to execute a DAG. @@ -687,12 +729,21 @@ async def visit(node_name): # Include execution history in output if operating in development settings if learning_observer.settings.RUN_MODE == learning_observer.settings.RUN_MODES.DEV: - return {e: clean_json(await visit(e)) for e in target_nodes} - + return {e: _clean_json_via_generator(await visit(e)) for e in target_nodes} + + # HACK currently `dashboard.py` relies on the provenance to tell users which + # items need updating, such as John Doe's history essay. This ought to be + # handled by the communication protocol during execution. Once that occurs, + # we can go back to stripping the provenance out. + return {e: _clean_json_via_generator(await visit(e)) for e in target_nodes} + # TODO test this code to make sure it works with async generators # Remove execution history if in deployed settings, with data flowing back to teacher dashboards - return {e: clean_json(strip_provenance(await visit(e))) for e in target_nodes} + return {e: _clean_json_via_generator(strip_provenance(await visit(e))) for e in target_nodes} if __name__ == "__main__": import doctest + # This function is used by doctests + from learning_observer.util import async_generator_to_list + doctest.testmod(optionflags=doctest.ELLIPSIS) diff --git a/learning_observer/learning_observer/communication_protocol/explorer.py b/learning_observer/learning_observer/communication_protocol/explorer.py index ebe45db3..4b4775d8 100644 --- a/learning_observer/learning_observer/communication_protocol/explorer.py +++ b/learning_observer/learning_observer/communication_protocol/explorer.py @@ -4,6 +4,10 @@ * The available queries * The DAG execution graphs associated with those * Parameters to said queries + +TODO at some point during development this broke and no longer +displays the DAGs correctly. This was a hacked together prototype +so efforts to fix have been pushed to wayside. ''' from dash import html, dcc, callback, Output, Input, State diff --git a/learning_observer/learning_observer/communication_protocol/test_cases.py b/learning_observer/learning_observer/communication_protocol/test_cases.py index fe3b6574..63822cc5 100644 --- a/learning_observer/learning_observer/communication_protocol/test_cases.py +++ b/learning_observer/learning_observer/communication_protocol/test_cases.py @@ -12,6 +12,7 @@ Test Cases: docs_with_roster Prints the dummy Google Docs text DAG. map_example Prints the map example test cases. + map_async_example Prints the map async example test cases. parameter_error Prints the parameter test case. field_error Prints the missing fields test case. malformed_key Prints the malformed key test case. @@ -26,12 +27,15 @@ import copy import docopt import json +import random import sys +import time import learning_observer.communication_protocol.executor import learning_observer.communication_protocol.integration import learning_observer.communication_protocol.query as q import learning_observer.communication_protocol.util +import learning_observer.communication_protocol.exception import learning_observer.constants as constants import learning_observer.offline @@ -56,7 +60,14 @@ def dummy_exception(): raise Exception('This is an exception that was raised in a published function.') -async def dummy_map(value, example): +async def dummy_async_map(value, example): + await asyncio.sleep(1) + if value.endswith('2'): + raise ValueError('Item ends with a 2') + return {'value': value, 'example': example} + +def dummy_sync_map(value, example): + time.sleep(1) if value.endswith('2'): raise ValueError('Item ends with a 2') return {'value': value, 'example': example} @@ -65,12 +76,14 @@ async def dummy_map(value, example): DUMMY_FUNCTIONS = { "learning_observer.dummyroster": dummy_roster, "learning_observer.dummycall": dummy_exception, - "learning_observer.dummymap": dummy_map + "learning_observer.dummyasyncmap": dummy_async_map, + "learning_observer.dummymap": dummy_sync_map } course_roster = q.call('learning_observer.dummyroster') exception_func = q.call('learning_observer.dummycall') map_func = q.call('learning_observer.dummymap') +async_map_func = q.call('learning_observer.dummyasyncmap') TEST_DAG = { 'execution_dag': { @@ -78,7 +91,8 @@ async def dummy_map(value, example): "doc_ids": q.select(q.keys('writing_observer.last_document', STUDENTS=q.variable("roster"), STUDENTS_path='user_id'), fields={'document_id': 'doc_id'}), "docs": q.select(q.keys('writing_observer.reconstruct', STUDENTS=q.variable("roster"), STUDENTS_path='user_id', RESOURCES=q.variable("doc_ids"), RESOURCES_path='doc_id'), fields={'text': 'text'}), "docs_join_roster": q.join(LEFT=q.variable("docs"), RIGHT=q.variable("roster"), LEFT_ON='provenance.provenance.STUDENT.value.user_id', RIGHT_ON='user_id'), - "map_students": q.map(map_func, q.variable('roster'), 'user_id', {'example': 123}), + "map_students": q.map(map_func, q.variable('roster'), 'user_id', {'example': 123}, parallel=True), + "map_async_students": q.map(async_map_func, q.variable('roster'), 'user_id', {'example': 123}, parallel=True), "field_error": q.select(q.keys('writing_observer.last_document', STUDENTS=q.variable("roster"), STUDENTS_path='user_id'), fields={'nonexistent_key': 'doc_id'}), "malformed_key_error": q.select([{'item': 1}, {'item': 2}], fields={'nonexistent_key': 'doc_id'}), "call_exception": exception_func(), @@ -101,12 +115,19 @@ async def dummy_map(value, example): "description": 'Show example of mapping students', 'expected': lambda x: isinstance(x, list) and 'value' in x[0] }, + 'map_async_example': { + 'returns': 'map_async_students', + 'parameters': ['course_id'], + 'test_parameters': {'course_id': 123}, + "description": 'Show example of mapping students', + 'expected': lambda x: isinstance(x, list) and 'value' in x[0] + }, 'parameter_error': { 'returns': 'docs_join_roster', 'parameters': ['course_id'], 'test_parameters': {}, "description": "Fetches student doc text; however, this errors since we do not provide the necessary parameters.", - 'expected': lambda x: isinstance(x, dict) and 'error' in x + 'expected': lambda x: isinstance(x, list) and 'error' in x[0] }, 'field_error': { 'returns': 'field_error', @@ -127,8 +148,11 @@ async def dummy_map(value, example): 'parameters': [], 'test_parameters': {}, 'description': "Throw an exception within a published function", - 'expected': lambda x: isinstance(x, dict) and 'error' in x + 'expected': lambda x: isinstance(x, list) and 'error' in x[0] }, + # TODO this test case fails and was failing before switching to an async generator + # Should we be erroring if the `left_on` path doesn't exist or just yielding `left` + # as is? Currently, `executor.py` yields `left`. 'join_key_error': { 'returns': 'join_key_error', 'parameters': [], @@ -141,13 +165,13 @@ async def dummy_map(value, example): 'parameters': [], 'test_parameters': {}, 'description': 'Test out circular node errors', - 'expected': lambda x: isinstance(x, dict) and 'error' in x + 'expected': lambda x: isinstance(x, list) and 'error' in x[0] } } } -def run_test_cases(test_cases, verbose=False): +async def run_test_cases(test_cases, verbose=False): """ Run all test cases. Print output from the ones specified. @@ -164,21 +188,24 @@ def run_test_cases(test_cases, verbose=False): print(f"Invalid test case. Available test cases are: {available_test_cases}") sys.exit() - learning_observer.offline.init() + learning_observer.offline.init('creds.yaml') for key in TEST_DAG['exports']: FLAT = learning_observer.communication_protocol.util.flatten(copy.deepcopy(TEST_DAG)) - EXECUTE = asyncio.run( - learning_observer.communication_protocol.executor.execute_dag( - copy.deepcopy(FLAT), parameters=TEST_DAG['exports'][key]['test_parameters'], - functions=DUMMY_FUNCTIONS, target_exports=[key] - ) + EXECUTE = await learning_observer.communication_protocol.executor.execute_dag( + copy.deepcopy(FLAT), parameters=TEST_DAG['exports'][key]['test_parameters'], + functions=DUMMY_FUNCTIONS, target_exports=[key] ) if (key in test_cases or 'all' in test_cases) and 'none' not in test_cases: print(f"Executing {key}") if verbose: print(json.dumps(EXECUTE, indent=2)) - assert (TEST_DAG['exports'][key]['expected'](EXECUTE[TEST_DAG['exports'][key]['returns']])) + + try: + driven_gen = [i async for i in EXECUTE[TEST_DAG['exports'][key]['returns']]] + except learning_observer.communication_protocol.exception.DAGExecutionException as e: + driven_gen = e.to_dict() + assert (TEST_DAG['exports'][key]['expected'](driven_gen)) print(' Received expected output.') @@ -190,4 +217,4 @@ def run_test_cases(test_cases, verbose=False): if args[''] == []: print(__doc__) sys.exit() - run_test_cases(args[''], args['--verbose']) + asyncio.run(run_test_cases(args[''], args['--verbose'])) diff --git a/learning_observer/learning_observer/communication_protocol/util.py b/learning_observer/learning_observer/communication_protocol/util.py index ac53b8b5..fc3f8f8c 100644 --- a/learning_observer/learning_observer/communication_protocol/util.py +++ b/learning_observer/learning_observer/communication_protocol/util.py @@ -13,7 +13,10 @@ def _flatten_helper(top_level, current_level, prefix=''): """ - Flatten the dictionary. + This is a helper function for taking a dictionary of nested + calls to the communication protocol, such as `select(keys(...))`, + and converting them to a flat dictionary. E.g. one item for + the `select` call and one item for the `key` call. :param top_level: The top level dictionary :type top_level: dict @@ -42,7 +45,11 @@ def _flatten_helper(top_level, current_level, prefix=''): def flatten(endpoint): """ - Flatten the endpoint. + The DAG is provided as a complex dictinoary structure. This function + flattens the dictionary to a single layer. + A query with a node `select(keys(...))` would start with a single + dictionary item and be translated to one for the `select` and another + for the `keys` portion. :param endpoint: The endpoint dictionary :type endpoint: dict @@ -56,6 +63,13 @@ def flatten(endpoint): def generate_base_dag_for_student_reducer(reducer, module): + ''' + A very common use-case is that we want the latest reducer output for a specific reducer for one course. + + This is a shorthand way to do it. + + TODO: We should probably give this a better name. Hopefully quickly, before a lot of code depends on this. E.g. `predefined_query.course_reducer`, `predefined_query.student_reducer`, etc. (so also move them into their own namespace). + ''' course_roster = q.call('learning_observer.courseroster') keys_node = f'{reducer}_keys' select_node = f'{reducer}_output' diff --git a/learning_observer/learning_observer/dashboard.py b/learning_observer/learning_observer/dashboard.py index 1d3e0774..8d06bfd3 100644 --- a/learning_observer/learning_observer/dashboard.py +++ b/learning_observer/learning_observer/dashboard.py @@ -1,11 +1,17 @@ ''' This generates dashboards from student data. + +TODO much of this file is no longer being used and the +unused code ought to be removed. We have iterated on how +we do this a few times and have landed in a much better +place than we started. ''' import asyncio import copy import inspect import json +import aiohttp.client_exceptions import jsonschema import numbers import pmss @@ -279,6 +285,9 @@ async def websocket_dashboard_view(request): ''' Handler to aggregate student data, and serve it back to the client every half-second to second or so. + + TODO remove this method. This is the old way of passing data from + the server to the client (pre communication protocol). ''' # Extract parameters from the URL # @@ -436,30 +445,34 @@ def fully_qualify_names_with_default_namespace(dag, namespace_prefix): return dag -async def dispatch_named_execution_dag(dag_name, funcs): +async def dispatch_named_execution_dag(dag_name): + '''This method takes a Named Query and fetches it from the + available DAGs on the system. + ''' available_dags = learning_observer.module_loader.execution_dags() query = None try: query = available_dags[dag_name] except KeyError: debug_log(await dag_not_found(dag_name)) - funcs.append(dag_not_found(dag_name)) finally: return query -async def dispatch_defined_execution_dag(dag, funcs): +async def dispatch_defined_execution_dag(dag): + '''This method confirms that an Open Queries provided by the user + are 1) allowed to be submitted and 2) adhere to the appropriate + JSON structure. + ''' query = None if not learning_observer.settings.pmss_settings.dangerously_allow_insecure_dags(): debug_log(await dag_submission_not_allowed()) - funcs.append(dag_submission_not_allowed()) return query try: learning_observer.communication_protocol.schema.prevalidate_schema(dag) query = dag except jsonschema.ValidationError as e: debug_log(await dag_incorrect_format(e)) - funcs.append(dag_incorrect_format(e)) return query finally: return query @@ -468,49 +481,93 @@ async def dispatch_defined_execution_dag(dag, funcs): DAG_DISPATCH = {dict: dispatch_defined_execution_dag, str: dispatch_named_execution_dag} -async def execute_queries(client_data, request): +async def _handle_dependent_dags(query): + ''' + Handles dependent DAGs and ensures all dependencies are present. + NOTE dependent dags only work for on a single level dependency + TODO allow multiple layers of dependency among dags + ''' execution_dags = learning_observer.module_loader.execution_dags() - funcs = [] - # client_data = { - # 'output_name': { - # 'execution_dag': 'writing_obssdfsderver', - # 'target_exports': ['docs_with_roster'], - # 'kwargs': {'course_id': 12345} - # }, - # } - for query_name, client_query in client_data.items(): - dag = client_query.get('execution_dag', query_name) - - if type(dag) not in DAG_DISPATCH: - debug_log(await dag_unsupported_type(type(dag))) - funcs.append(dag_unsupported_type(type(dag))) - continue - - query = await DAG_DISPATCH[type(dag)](dag, funcs) - if query is None: - continue - - # NOTE dependent dags only work for on a single level dependency - # TODO allow multiple layers of dependency among dags - dependent_dags = extract_namespaced_dags(query['execution_dag']) - missing_dags = dependent_dags - execution_dags.keys() - if missing_dags: - debug_log(await dag_not_found(missing_dags)) - funcs.append(dag_not_found(missing_dags)) - continue - for dep in dependent_dags: - dep_dag = copy.deepcopy(execution_dags[dep]['execution_dag']) - prefixed_dag = fully_qualify_names_with_default_namespace(dep_dag, dep) - query['execution_dag'] = {**query['execution_dag'], **{f'{dep}.{k}': v for k, v in prefixed_dag.items()}} - - target_exports = client_query.get('target_exports', []) - query_func = learning_observer.communication_protocol.integration.prepare_dag_execution(query, target_exports) - client_parameters = client_query.get('kwargs', {}) - runtime = learning_observer.runtime.Runtime(request) - client_parameters['runtime'] = runtime - query_func = query_func(**client_parameters) - funcs.append(query_func) - return await asyncio.gather(*funcs, return_exceptions=False) + dependent_dags = extract_namespaced_dags(query['execution_dag']) + missing_dags = dependent_dags - execution_dags.keys() + + if missing_dags: + # TODO we ought to handle this as an error + debug_log(await dag_not_found(missing_dags)) + return + + for dep in dependent_dags: + # Copy and qualify names for dependent DAG + dep_dag = copy.deepcopy(execution_dags[dep]['execution_dag']) + prefixed_dag = fully_qualify_names_with_default_namespace(dep_dag, dep) + + # Merge dependent DAG with current query + query['execution_dag'] = {**query['execution_dag'], **{f'{dep}.{k}': v for k, v in prefixed_dag.items()}} + + return query + + +async def _prepare_dag_as_generator(client_query, query, target, request): + ''' + Prepares the query for execution, sets up client parameters and runtime. + ''' + target_exports = [target] + + # Prepare the DAG execution function + query_func = learning_observer.communication_protocol.integration.prepare_dag_execution(query, target_exports) + + # Handle client parameters and runtime setup + client_parameters = client_query.get('kwargs', {}).copy() + runtime = learning_observer.runtime.Runtime(request) + client_parameters['runtime'] = runtime + + # Execute the query and return the first value from the generator + generator_dictionary = await query_func(**client_parameters) + return next(iter(generator_dictionary.values())) + + +async def _create_dag_generator(client_query, target, request): + dag = client_query['execution_dag'] + if type(dag) not in DAG_DISPATCH: + debug_log(await dag_unsupported_type(type(dag))) + return + + query = await DAG_DISPATCH[type(dag)](dag) + if query is None: + # the DAG_DISPATCH prints a more detailed message about why + debug_log('The submitted query failed.') + return + query = await _handle_dependent_dags(query) + return await _prepare_dag_as_generator(client_query, query, target, request) + + +def _find_student_or_resource(d): + '''HACK the communication protocol does not provide an easy way to + determine which student or student/document pair is being updated. + The protocol does include a provenance key with each item that includes + the history of what occured within the protocol. + In production settings, the provenance should be removed from the + user output. However, this method assumes that the provenance is still + around. + This method digs into the provenance and extracts the corresponding + student or student/document id. This information is used to tell the + client which items in their data-tree to update (i.e. update Billy's + History Essay with this new information). + ''' + if not isinstance(d, dict): + return [] + if 'provenance' in d: + provenance = d['provenance'] + output = [] + if 'STUDENT' in provenance: + output.append(provenance['STUDENT']['user_id']) + if 'RESOURCE' in provenance: + output.append('documents') + output.append(provenance['RESOURCE']['doc_id']) + if output: + return output + return _find_student_or_resource(provenance) + return [] @learning_observer.auth.teacher @@ -527,12 +584,75 @@ async def websocket_dashboard_handler(request): ''' ws = aiohttp.web.WebSocketResponse(receive_timeout=0.3) await ws.prepare(request) - client_data = None + client_query = None + previous_client_query = None + batch = [] + lock = asyncio.Lock() + + async def _send_update(update): + '''Send an update to our batch + ''' + async with lock: + batch.append(update) + + async def _batch_send(): + '''If our batch has any items, send them to the client + then wait before checking again. + ''' + while True: + async with lock: + if batch: + try: + await ws.send_json(batch) + batch.clear() + except aiohttp.web_ws.WebSocketError: + break + except aiohttp.client_exceptions.ClientConnectionResetError: + break + if ws.closed: + break + # TODO this ought to be pulled from somewhere + await asyncio.sleep(1) + + async def _execute_dag(dag_query, target, params): + '''This method creates the DAG generator and drives it. + Once finished, we wait until rescheduling it. If the parameters + change, we exit before creating and driving the generator. + ''' + if params != client_query: + # the params are different and we should stop this generator + return + + # Create DAG generator and drive + generator = await _create_dag_generator(dag_query, target, request) + await _drive_generator(generator, dag_query['kwargs']) + + # Handle rescheduling the execution of the DAG for fresh data + dag_delay = dag_query['kwargs'].get('rerun_dag_delay', 10) + if dag_delay < 0: + # if dag_delay is negative, we skip repeated execution + return + await asyncio.sleep(dag_delay) + await _execute_dag(dag_query, target, params) + + async def _drive_generator(generator, dag_kwargs): + '''For each item in the generator, this method creates + an update to send to the client. + ''' + async for item in generator: + scope = _find_student_or_resource(item) + update_path = ".".join(scope) + if 'option_hash' in dag_kwargs: + item['option_hash'] = dag_kwargs['option_hash'] + await _send_update({'op': 'update', 'path': update_path, 'value': item}) + + send_batches = asyncio.create_task(_batch_send()) while True: try: - client_data = await ws.receive_json() - # TODO we should validate the client_data structure + received_params = await ws.receive_json() + client_query = received_params + # TODO we should validate the client_query structure except (TypeError, ValueError): # these Errors may signal a close if (await ws.receive()).type == aiohttp.WSMsgType.CLOSE: @@ -540,20 +660,23 @@ async def websocket_dashboard_handler(request): return aiohttp.web.Response() except asyncio.exceptions.TimeoutError: # this is the normal path of the code - # if the client_data hasn't been set, keep waiting for it - if client_data is None: + # if the client_query hasn't been set, keep waiting for it + if client_query is None: continue if ws.closed: debug_log("Socket closed.") return aiohttp.web.Response() - outputs = await execute_queries(client_data, request) - - await ws.send_json({q: v for q, v in zip(client_data.keys(), outputs)}) - # TODO allow the client to set the update timer. - # it would be cool if the client could set different sleep timers for each item - await asyncio.sleep(3) + if client_query != previous_client_query: + previous_client_query = copy.deepcopy(client_query) + # HACK even though we can specificy multiple targets for a + # single DAG, this creates a new DAG for each. This eventually + # allows us to specify different parameters (such as the + # reschedule timeout). + for k, v in client_query.items(): + for target in v.get('target_exports', []): + asyncio.create_task(_execute_dag(v, target, client_query)) # Obsolete code -- we should put this back in after our refactor. Allows us to use diff --git a/learning_observer/learning_observer/incoming_student_event.py b/learning_observer/learning_observer/incoming_student_event.py index 26451827..a3d6c029 100644 --- a/learning_observer/learning_observer/incoming_student_event.py +++ b/learning_observer/learning_observer/incoming_student_event.py @@ -369,7 +369,19 @@ async def handle_auth_events(events): async for event in events: if 'auth' in event: - raise ValueError('Auth already exists in event, someone may be trying to hack the system') + ''' + If 'auth' already exists, this means + 1. Someone is trying to hack the system + 2. Someone is restreaming logs into the system + We should record the current auth to history and + then remove it from the event. The `.authenticate` + function will take care of re-authorizing the user. + + TODO determine how to store the auth history and append + current auth object. + ''' + del event['auth'] + if not authenticated: authenticated = await learning_observer.auth.events.authenticate( request=request, diff --git a/learning_observer/learning_observer/static/webapp.html b/learning_observer/learning_observer/static/webapp.html index 7ca747d6..5238f5d2 100644 --- a/learning_observer/learning_observer/static/webapp.html +++ b/learning_observer/learning_observer/static/webapp.html @@ -43,8 +43,8 @@

Learning Observer - by Piotr Mitros. Copyright - (c) 2020-2022. Educational Testing + by Piotr Mitros and Bradley Erickson. Copyright + (c) 2020-2024. Educational Testing Service. The source code is available under the diff --git a/learning_observer/learning_observer/util.py b/learning_observer/learning_observer/util.py index ade27781..96140943 100644 --- a/learning_observer/learning_observer/util.py +++ b/learning_observer/learning_observer/util.py @@ -8,7 +8,8 @@ We can relax the design invariant, but we should think carefully before doing so. ''' - +import asyncio +import collections import dash.development.base_component import datetime import enum @@ -16,7 +17,6 @@ import math import numbers import re -import socket import uuid from dateutil import parser @@ -252,6 +252,53 @@ def generate_unique_token(): return f'{count}-{timestamp()}-{str(uuid.uuid4())}' +async def ensure_async_generator(it): + '''Take an iterable or single dict item and return it + as an async generator. + ''' + if isinstance(it, dict): + yield it + elif isinstance(it, collections.abc.AsyncIterable): + # If it is already an async iterable, yield from it + async for item in it: + yield item + elif isinstance(it, collections.abc.Iterable): + # If it is a synchronous iterable, iterate over it and yield items + for item in it: + yield item + else: + raise TypeError(f"Object of type {type(it)} is not iterable") + + +async def async_zip(iterator1, iterator2): + '''Zip 2 async generators together. + This functions similar to `zip` + ''' + gen1 = ensure_async_generator(iterator1) + gen2 = ensure_async_generator(iterator2) + try: + while True: + # asyncio.gather finishes when both `anext` items are ready + item1, item2 = await asyncio.gather( + gen1.__anext__(), + gen2.__anext__() + ) + yield item1, item2 + except StopAsyncIteration: + pass + + +async def async_generator_to_list(gen): + '''This is a helper function for converting an async generator + to a list. This is often used when testing pieces of an async + generator pipeline. + ''' + result = [] + async for item in gen: + result.append(item) + return result + + # And a test case if __name__ == '__main__': assert to_safe_filename('{') == '-123-' diff --git a/learning_observer/util/restream.py b/learning_observer/util/restream.py index a7624da6..3641b692 100644 --- a/learning_observer/util/restream.py +++ b/learning_observer/util/restream.py @@ -71,31 +71,35 @@ async def restream( async with session.ws_connect(url) as web_socket: async with aiofiles.open(filename) as log_file: async for line in log_file: + if filename.endswith('.study.log'): + # HACK the `.study.log` include the event along + # with a timestamp + json_line = json.loads(line.split('\t')[0]) + else: + json_line = json.loads(line) + if rate is not None: - jline = json.loads(line) - if jline['client']['event'] in skip: + if json_line['client']['event'] in skip: continue - new_ts = jline["server"]["time"] + new_ts = json_line["server"]["time"] if old_ts is not None: delay = (new_ts - old_ts) / rate if max_wait is not None: - delay = min(delay, max_wait) + delay = min(delay, float(max_wait)) print(line) print(delay) await asyncio.sleep(delay) old_ts = new_ts if extract_client or rename: - json_line = json.loads(line) if extract_client: json_line = json_line['client'] - print(json.dumps(json_line, indent=2)) if rename: if 'auth' not in json_line: json_line['auth'] = {} json_line['auth']['user_id'] = new_id - line = json.dumps(json_line) + jline = json.dumps(json_line) - await web_socket.send_str(line.strip()) + await web_socket.send_str(jline.strip()) return True diff --git a/modules/lo_dash_react_components/MANIFEST.in b/modules/lo_dash_react_components/MANIFEST.in index 6700bd3b..9d0565fb 100644 --- a/modules/lo_dash_react_components/MANIFEST.in +++ b/modules/lo_dash_react_components/MANIFEST.in @@ -9,4 +9,5 @@ include lo_dash_react_components/package-info.json recursive-include lo_dash_react_components/css *.css include README.md include LICENSE -include package.json \ No newline at end of file +include package.json +include requirements.txt diff --git a/modules/lo_dash_react_components/lo_dash_react_components/LOConnectionAIO.py b/modules/lo_dash_react_components/lo_dash_react_components/LOConnectionAIO.py new file mode 100644 index 00000000..70de0ef4 --- /dev/null +++ b/modules/lo_dash_react_components/lo_dash_react_components/LOConnectionAIO.py @@ -0,0 +1,168 @@ +''' +This file creates an All-In-One component for the Learning +Observer server connection. This handles updating data from the +server (based on individual tree updates), storing any errors +that occured, and showing the time since it was last updated. +''' +from dash import html, dcc, clientside_callback, Output, Input, State, MATCH +import uuid + +from .LOConnection import LOConnection + +class LOConnectionAIO(html.Div): + class ids: + websocket = lambda aio_id: { + 'component': 'LOConnectionAIO', + 'subcomponent': 'websocket', + 'aio_id': aio_id + } + connection_status = lambda aio_id: { + 'component': 'LOConnectionAIO', + 'subcomponent': 'connection_status', + 'aio_id': aio_id + } + last_updated_store = lambda aio_id: { + 'component': 'LOConnectionAIO', + 'subcomponent': 'last_updated_store', + 'aio_id': aio_id + } + last_updated_time = lambda aio_id: { + 'component': 'LOConnectionAIO', + 'subcomponent': 'last_updated_time', + 'aio_id': aio_id + } + last_updated_interval = lambda aio_id: { + 'component': 'LOConnectionAIO', + 'subcomponent': 'last_updated_interval', + 'aio_id': aio_id + } + ws_store = lambda aio_id: { + 'component': 'LOConnectionAIO', + 'subcomponent': 'ws_store', + 'aio_id': aio_id + } + error_store = lambda aio_id: { + 'component': 'LOConnectionAIO', + 'subcomponent': 'error_store', + 'aio_id': aio_id + } + + ids = ids + + def __init__(self, aio_id=None, data_scope=None): + if aio_id is None: + aio_id = str(uuid.uuid4()) + + # Determine which state we are in + component = [ + html.I(id=self.ids.connection_status(aio_id)), + html.Span('Last Updated:', className='mx-1'), + html.Span(id=self.ids.last_updated_time(aio_id)), + dcc.Interval(id=self.ids.last_updated_interval(aio_id), interval=5000), + LOConnection(id=self.ids.websocket(aio_id), data_scope=data_scope), + dcc.Store(id=self.ids.last_updated_store(aio_id), data=-1), + dcc.Store(id=self.ids.ws_store(aio_id), data={}), + dcc.Store(id=self.ids.error_store(aio_id), data={}) + ] + super().__init__(component) + + # Update connection status information + clientside_callback( + '''function (status) { + const icons = ['fas fa-sync-alt', 'fas fa-check text-success', 'fas fa-sync-alt', 'fas fa-times text-danger']; + const titles = ['Connecting to server', 'Connected to server', 'Closing connection', 'Disconnected from server']; + if (status === undefined) { + return [icons[3], titles[3]]; + } + return [icons[status.readyState], titles[status.readyState]]; + } + ''', + Output(ids.connection_status(MATCH), 'className'), + Output(ids.connection_status(MATCH), 'title'), + Input(ids.websocket(MATCH), 'state'), + ) + + # Update connection last modified text + clientside_callback( + '''function (lastTime, intervals) { + if (lastTime === -1) { + return 'Never'; + } + const currTime = new Date(); + const secondDiff = (currTime.getTime() - lastTime.getTime())/1000 + if (secondDiff < 1) { + return 'just now' + } + const ms_since_last_message = rendertime2(secondDiff); + return `${ms_since_last_message} ago`; + } + ''', + Output(ids.last_updated_time(MATCH), 'children'), + Input(ids.last_updated_store(MATCH), 'data'), + Input(ids.last_updated_interval(MATCH), 'n_intervals') + ) + + # Update when the data was last modified + clientside_callback( + '''function (data) { + if (data !== undefined) { + return new Date(); + } + return window.dash_clientside.no_update; + }''', + Output(ids.last_updated_store(MATCH), 'data'), + Input(ids.websocket(MATCH), 'message') + ) + + # Handle incoming message from server + clientside_callback( + '''function (incomingMessage, currentData, errorStore) { + // console.log('LOConnection', incomingMessage, currentData, errorStore); + if (incomingMessage !== undefined) { + const messages = JSON.parse(incomingMessage.data); + messages.forEach(message => { + const pathKeys = message.path.split('.'); + let current = currentData; + + // Traverse the path to get to the right location + for (let i = 0; i < pathKeys.length - 1; i++) { + const key = pathKeys[i]; + if (!(key in current)) { + current[key] = {}; // Create path if it doesn't exist + } + current = current[key]; + } + + if ('error' in message.value) { + errorStore[message.path] = message.value; + } else { + delete errorStore[message.path]; + } + const finalKey = pathKeys[pathKeys.length - 1]; + if (message.op === 'update') { + if (current[finalKey] === undefined) { + current[finalKey] = {}; + } + if ('error' in message.value) { + current[finalKey]['error'] = message.value; + current[finalKey]['option_hash'] = message.value['option_hash']; + } else { + delete current[finalKey]['error']; + // Shallow merge using spread syntax + current[finalKey] = { + ...current[finalKey], // Existing data + ...message.value // New data (overwrites where necessary) + }; + } + } + }); + return [currentData, errorStore]; // Return updated data + } + return window.dash_clientside.no_update; + }''', + Output(ids.ws_store(MATCH), 'data'), + Output(ids.error_store(MATCH), 'data'), + Input(ids.websocket(MATCH), 'message'), + State(ids.ws_store(MATCH), 'data'), + State(ids.error_store(MATCH), 'data') + ) diff --git a/modules/lo_dash_react_components/lo_dash_react_components/LOConnectionStatusAIO.py b/modules/lo_dash_react_components/lo_dash_react_components/LOConnectionStatusAIO.py new file mode 100644 index 00000000..aeb8c4ce --- /dev/null +++ b/modules/lo_dash_react_components/lo_dash_react_components/LOConnectionStatusAIO.py @@ -0,0 +1,106 @@ +''' +This file creates an All-In-One component for the Learning +Observer server connection. This handles updating data from the +server and showing the time since it was last updated. + +TODO this file is still being used by the cookiecutter module. +This was replaced by LOConnectionAIO to utilize the new method +of updating data. +''' +from dash import html, dcc, clientside_callback, Output, Input, MATCH +import uuid + +from .LOConnection import LOConnection + +class LOConnectionStatusAIO(html.Div): + class ids: + websocket = lambda aio_id: { + 'component': 'LOConnectionStatus', + 'subcomponent': 'websocket', + 'aio_id': aio_id + } + connection_status = lambda aio_id: { + 'component': 'LOConnectionStatus', + 'subcomponent': 'connection_status', + 'aio_id': aio_id + } + last_updated_store = lambda aio_id: { + 'component': 'LOConnectionStatus', + 'subcomponent': 'last_updated_store', + 'aio_id': aio_id + } + last_updated_time = lambda aio_id: { + 'component': 'LOConnectionStatus', + 'subcomponent': 'last_updated_time', + 'aio_id': aio_id + } + last_updated_interval = lambda aio_id: { + 'component': 'LOConnectionStatus', + 'subcomponent': 'last_updated_interval', + 'aio_id': aio_id + } + + ids = ids + + def __init__(self, aio_id=None, data_scope=None): + if aio_id is None: + aio_id = str(uuid.uuid4()) + + # Determine which state we are in + component = [ + html.I(id=self.ids.connection_status(aio_id)), + html.Span('Last Updated:', className='mx-1'), + html.Span(id=self.ids.last_updated_time(aio_id)), + dcc.Interval(id=self.ids.last_updated_interval(aio_id), interval=5000), + LOConnection(id=self.ids.websocket(aio_id), data_scope=data_scope), + dcc.Store(id=self.ids.last_updated_store(aio_id), data=-1) + ] + super().__init__(component) + + clientside_callback( + # ClientsideFunction(namespace='lo_dash_react_components', function_name='update_connection_status_icon'), + '''function (status) { + const icons = ['fas fa-sync-alt', 'fas fa-check text-success', 'fas fa-sync-alt', 'fas fa-times text-danger']; + const titles = ['Connecting to server', 'Connected to server', 'Closing connection', 'Disconnected from server']; + if (status === undefined) { + return [icons[3], titles[3]]; + } + return [icons[status.readyState], titles[status.readyState]]; + } + ''', + Output(ids.connection_status(MATCH), 'className'), + Output(ids.connection_status(MATCH), 'title'), + Input(ids.websocket(MATCH), 'state'), + ) + + clientside_callback( + # ClientsideFunction(namespace='lo_dash_react_components', function_name='update_connection_last_modified_text'), + '''function (lastTime, intervals) { + if (lastTime === -1) { + return 'Never'; + } + const currTime = new Date(); + const secDiff = (currTime.getTime() - lastTime.getTime())/1000 + if (secDiff < 1) { + return 'just now' + } + const ms_since_last_message = rendertime2(secDiff); + return `${ms_since_last_message} ago`; + } + ''', + Output(ids.last_updated_time(MATCH), 'children'), + Input(ids.last_updated_store(MATCH), 'data'), + Input(ids.last_updated_interval(MATCH), 'n_intervals') + ) + + clientside_callback( + # ClientsideFunction(namespace='lo_dash_react_components', function_name='update_connection_last_modified_store'), + '''function (data) { + if (data !== undefined) { + return new Date(); + } + return window.dash_clientside.no_update; + }''', + Output(ids.last_updated_store(MATCH), 'data'), + Input(ids.websocket(MATCH), 'message') + ) diff --git a/modules/lo_dash_react_components/lo_dash_react_components/ProfileSidebarAIO.py b/modules/lo_dash_react_components/lo_dash_react_components/ProfileSidebarAIO.py new file mode 100644 index 00000000..0205429c --- /dev/null +++ b/modules/lo_dash_react_components/lo_dash_react_components/ProfileSidebarAIO.py @@ -0,0 +1,82 @@ +''' +This file creates an All-In-One component for a sidebar +component that allows users to navigate throughout the platform. +The sidebar shows a Home and Logout button as well as a list +of available dashboards. +''' +from dash import html, clientside_callback, Output, Input, State, MATCH +import dash_bootstrap_components as dbc +import uuid + +class ProfileSidebarAIO(html.Div): + class ids: + toggle_open = lambda aio_id: { + 'component': 'ProfileSidebarAIO', + 'subcomponent': 'toggle_open', + 'aio_id': aio_id + } + offcanvas = lambda aio_id: { + 'component': 'ProfileSidebarAIO', + 'subcomponent': 'offcanvas', + 'aio_id': aio_id + } + modules = lambda aio_id: { + 'component': 'ProfileSidebarAIO', + 'subcomponent': 'module_list', + 'aio_id': aio_id + } + + ids = ids + + def __init__(self, aio_id=None, class_name='', color='primary'): + if aio_id is None: + aio_id = str(uuid.uuid4()) + + component = [ + dbc.Button(html.I(className='fas fa-user'), id=self.ids.toggle_open(aio_id), color=color, class_name=class_name), + dbc.Offcanvas([ + dbc.Button([html.I(className='fas fa-home me-1'), 'Home'], href='/', external_link=True), + html.H4('Modules'), + html.Ul(id=self.ids.modules(aio_id)), + dbc.Button([html.I(className='fas fa-right-from-bracket me-1'), 'Logout'], color='danger', href='/auth/logout', external_link=True), + ], title='Profile', id=self.ids.offcanvas(aio_id), placement='end') + ] + super().__init__(component) + + # Toggle sidebar + clientside_callback( + '''function (clicks, isOpen) { + if (clicks > 0) { return !isOpen; } + return isOpen; + } + ''', + Output(ids.offcanvas(MATCH), 'is_open'), + Input(ids.toggle_open(MATCH), 'n_clicks'), + State(ids.offcanvas(MATCH), 'is_open') + ) + + # Update available dashboard items + clientside_callback( + # TODO include the course_id in these - will need to parse it out of the current string + '''async function (empty) { + const response = await fetch(`${window.location.protocol}//${window.location.hostname}:${window.location.port}/webapi/course_dashboards`); + + const modules = await response.json(); + const items = modules.map((x) => { + const link = { + namespace: 'dash_html_components', + type: 'A', + props: { children: x.name, href: x.url + window.location.hash } + } + return { + namespace: 'dash_html_components', + type: 'Li', + props: { children: link } + } + }) + return items; + } + ''', + Output(ids.modules(MATCH), 'children'), + Input(ids.modules(MATCH), 'className'), + ) diff --git a/modules/lo_dash_react_components/lo_dash_react_components/__init__.py b/modules/lo_dash_react_components/lo_dash_react_components/__init__.py index 95134eec..4c6fecb3 100644 --- a/modules/lo_dash_react_components/lo_dash_react_components/__init__.py +++ b/modules/lo_dash_react_components/lo_dash_react_components/__init__.py @@ -10,6 +10,10 @@ from ._imports_ import * from ._imports_ import __all__ +from .LOConnectionStatusAIO import LOConnectionStatusAIO +from .LOConnectionAIO import LOConnectionAIO +from .ProfileSidebarAIO import ProfileSidebarAIO + if not hasattr(_dash, '__plotly_dash') and not hasattr(_dash, 'development'): print('Dash was not successfully imported. ' 'Make sure you don\'t have a file ' diff --git a/modules/lo_dash_react_components/src/lib/components/LOCards.css b/modules/lo_dash_react_components/src/lib/components/LOCards.css index d77dee7d..54f16ae2 100644 --- a/modules/lo_dash_react_components/src/lib/components/LOCards.css +++ b/modules/lo_dash_react_components/src/lib/components/LOCards.css @@ -4,7 +4,7 @@ align-items: center; } -.card { +.lo-card { border: 1px solid #ccc; border-radius: 8px; padding: 16px; diff --git a/modules/lo_dash_react_components/src/lib/components/LOCards.react.js b/modules/lo_dash_react_components/src/lib/components/LOCards.react.js index 60c2728c..c5890f10 100644 --- a/modules/lo_dash_react_components/src/lib/components/LOCards.react.js +++ b/modules/lo_dash_react_components/src/lib/components/LOCards.react.js @@ -7,7 +7,7 @@ import "./LOCards.css"; const LOCard = ({ title, description }) => { return ( -

+

{title}

{description}

diff --git a/modules/lo_dash_react_components/src/lib/components/LONameTag.react.js b/modules/lo_dash_react_components/src/lib/components/LONameTag.react.js index 36368352..87970be9 100644 --- a/modules/lo_dash_react_components/src/lib/components/LONameTag.react.js +++ b/modules/lo_dash_react_components/src/lib/components/LONameTag.react.js @@ -7,6 +7,13 @@ import PropTypes from "prop-types"; export default class LONameTag extends Component { render() { const { id, profile, className, includeName } = this.props; + + // Check for the existence of necessary profile keys + const hasValidPhotoUrl = profile?.photo_url && profile.photo_url !== '//lh3.googleusercontent.com/a/default-user'; + const givenName = profile?.name?.given_name ?? ''; + const familyName = profile?.name?.family_name ?? ''; + const fullName = profile?.name?.full_name ?? ''; + return (
{ - (profile.photo_url & profile.photo_url !== '//lh3.googleusercontent.com/a/default-user') - ? - : {`${profile.name.given_name.slice(0,1)}${profile.name.family_name.slice(0,1)}`} + hasValidPhotoUrl + ? + : {`${givenName.slice(0, 1)}${familyName.slice(0, 1)}`} } - {includeName ? {profile.name.full_name} : } + {includeName ? {fullName} : }
- ) + ); } } + LONameTag.defaultProps = { id: "", className: "", diff --git a/modules/lo_dash_react_components/src/lib/components/WOAnnotatedText.react.js b/modules/lo_dash_react_components/src/lib/components/WOAnnotatedText.react.js index aaf961f1..27593c54 100644 --- a/modules/lo_dash_react_components/src/lib/components/WOAnnotatedText.react.js +++ b/modules/lo_dash_react_components/src/lib/components/WOAnnotatedText.react.js @@ -1,90 +1,80 @@ -import React, { Component } from 'react' -import PropTypes from 'prop-types' -import OverlayTrigger from 'react-bootstrap/OverlayTrigger' -import Popover from 'react-bootstrap/Popover' +import React, { Component } from 'react'; +import PropTypes from 'prop-types'; +import OverlayTrigger from 'react-bootstrap/OverlayTrigger'; +import Popover from 'react-bootstrap/Popover'; -import 'react-tooltip/dist/react-tooltip.css' +import 'react-tooltip/dist/react-tooltip.css'; /** * WOAnnotatedText */ export default class WOAnnotatedText extends Component { constructor(props) { - super(props) + super(props); this.state = { selectedItem: null - } - } - - handleOverlap = (chunks, text) => { - return chunks.reduce((prev, curr) => { - const lastChunk = prev[prev.length - 1] - if (lastChunk && lastChunk.end > curr.start) { - const commonText = text.substring(curr.start, lastChunk.end) - const remainderText = text.substring(lastChunk.end, curr.end) - const newLastChunk = { ...lastChunk, text: text.substring(lastChunk.start, curr.start) } - const commonChunk = { - text: commonText, - annotated: true, - id: `${newLastChunk.id}-${curr.id}`, - start: curr.start, - end: lastChunk.end, - tooltip: lastChunk.tooltip.concat(curr.tooltip), - style: { ...curr.style, ...lastChunk.style } - } - const newChunk = { - text: remainderText, - annotated: true, - id: curr.id, - start: lastChunk.end, - end: curr.end, - tooltip: curr.tooltip, - style: curr.style - } - return [...prev.slice(0, prev.length - 1), newLastChunk, commonChunk, newChunk] - } else { - return [...prev, curr] - } - }, []) + }; } replaceNewLines = (str) => { - const split = str.split('\n') + const split = str.split('\n'); if (split.length > 1) { return split.map((line, index) => ( {line} {split.length-1 === index ? :
}
- )) + )); } - return str + return str; } render() { - const { breakpoints, text, className } = this.props - const sortedList = [...breakpoints].sort((a, b) => a.start - b.start) - let chunks = sortedList.reduce((prev, { start, offset, tooltip, style }, index) => { - const lastOffset = prev.length ? prev[prev.length - 1].end : 0 - if (start > lastOffset) { - prev.push({ - text: text.substring(lastOffset, start), + const { breakpoints, text, className } = this.props; + + const breaks = new Set(); + breakpoints.forEach(obj => { + breaks.add(obj.start); + breaks.add(obj.start + obj.offset); + }); + breaks.add(0); + breaks.add(text.length); + + const ids = {}; + breaks.forEach(item => { + ids[item] = []; + }); + + const breaksList = [...breaks].sort((a, b) => a - b); + let matchingBreaks = []; + + breakpoints.forEach(obj => { + matchingBreaks = breaksList.filter(v => (v >= obj.start & v < (obj.start + obj.offset))); + matchingBreaks.forEach(b => { + ids[b] = ids[b].concat({ tooltip: obj.tooltip, style: obj.style }); + }); + }) + + const chunks = Array(breaksList.length - 1); + let curr, textChunk; + for (let i = 0; i < chunks.length; i++) { + curr = ids[breaksList[i]]; + textChunk = text.substring(breaksList[i], breaksList[i+1]); + if (curr.length === 0) { + chunks[i] = { + text: textChunk, annotated: false - }) + }; + } else { + chunks[i] = { + text: textChunk, + annotated: true, + id: i, + tooltip: curr.map(o => o.tooltip), + style: curr[0].style + }; } - prev.push({ - text: text.substring(start, start + offset), - annotated: true, - id: index, - start: start, - end: start + offset, - tooltip: [tooltip], - style: style - }) - return prev - }, []) - - chunks = this.handleOverlap(chunks, text) + } if (chunks.length === 0) { return
@@ -97,7 +87,7 @@ export default class WOAnnotatedText extends Component { chunks.push({ text: text.substring(chunks[chunks.length - 1].end), annotated: false - }) + }); } return (
@@ -111,14 +101,18 @@ export default class WOAnnotatedText extends Component { Annotations - {chunk.tooltip} +
    + {[...new Set(chunk.tooltip)].map((item, index) => ( +
  • + {item} +
  • + ))} +
} > - + {this.replaceNewLines(chunk.text)} @@ -127,7 +121,7 @@ export default class WOAnnotatedText extends Component { ))}
- ) + ); } } @@ -156,7 +150,7 @@ WOAnnotatedText.propTypes = { id: PropTypes.string, start: PropTypes.number, offset: PropTypes.number, - tooltip: PropTypes.node, + tooltip: PropTypes.string, style: PropTypes.object })), diff --git a/modules/lo_dash_react_components/src/lib/components/WOSettings.react.js b/modules/lo_dash_react_components/src/lib/components/WOSettings.react.js new file mode 100644 index 00000000..2295b8f0 --- /dev/null +++ b/modules/lo_dash_react_components/src/lib/components/WOSettings.react.js @@ -0,0 +1,148 @@ +import React, { Component } from 'react'; +import PropTypes from 'prop-types'; + +function generateNewHighlightColor () { + // Generate random RGB values + const r = Math.floor(Math.random() * 128) + 128; // 128-255 for brighter colors + const g = Math.floor(Math.random() * 128) + 128; + const b = Math.floor(Math.random() * 128) + 128; + + // Convert RGB to hex + const hex = `#${((1 << 24) + (r << 16) + (g << 8) + b).toString(16).slice(1)}`; + return hex; +} + +function sortOptionsIntoTree (options) { + // Create a map of options by their ids + const optionsMap = new Map(); + options.forEach(option => optionsMap.set(option.id, option)); + + // Initialize an array to store the sorted options + const sortedOptions = []; + + // Function to recursively add children to the sorted array + function addChildren (parentId, depth) { + options + .filter(option => option.parent === parentId) + .forEach(option => { + sortedOptions.push({ ...option, depth }); + addChildren(option.id, depth + 1); // Recursively add children + }); + } + + // Start by adding top-level items (those with an empty parent) + addChildren('', 0); + + return sortedOptions; +} + +/** + * WOSettings is a generic settings interface. + * User can define + */ +export default class WOSettings extends Component { + constructor (props) { + super(props); + this.handleRowEvent = this.handleRowEvent.bind(this); + this.renderRow = this.renderRow.bind(this); + } + + handleRowEvent (event, key, type, colorPicker = false) { + const { setProps, options } = this.props; + const oldOptions = structuredClone(options); + const current = oldOptions.find(option => option.id === key); + if (colorPicker) { + current.types[type].color = event.target.value; + } else { + const { checked } = event.target; + current.types[type].value = checked; + current.types[type].color = current.types[type].color || generateNewHighlightColor(); + } + setProps({ options: oldOptions }); + } + + renderRow (row) { + const highlightCell = (row.types && 'highlight' in row.types) + ? (<> + this.handleRowEvent(e, row.id, 'highlight')} /> + {row.types.highlight.value + ? this.handleRowEvent(e, row.id, 'highlight', true)} /> + : null} + ) + : null; + const metricCell = (row.types && 'metric' in row.types) + ? this.handleRowEvent(e, row.id, 'metric')} /> + : null; + return ( + + {'\u00A0'.repeat(row.depth * 2) + row.label} + {highlightCell} + {/* {metricCell} */} + + ); + } + + render () { + const { id, className, options } = this.props; + const rows = sortOptionsIntoTree(options); + // TODO due to a HACK with passing data to the child component of + // the student tiles, we currently only support a single child and + // expect it to be the highlighted text component. + return ( + + + + + + {/* */} + + + + {rows.map((r) => this.renderRow(r))} + +
NameHighlightMetric
+ ); + } +}; + +WOSettings.defaultProps = { + id: '', + className: '', + options: {} +}; + +WOSettings.propTypes = { + /** + * The ID used to identify this component in Dash callbacks. + */ + id: PropTypes.string, + + /** + * Classes for the outer most div. + */ + className: PropTypes.string, + + /** + * Dash-assigned callback that should be called to report property changes + * to Dash, to make them available for callbacks. + */ + setProps: PropTypes.func, + + /** + * Array of available options + */ + options: PropTypes.arrayOf(PropTypes.exact({ + id: PropTypes.string, + label: PropTypes.string, + parent: PropTypes.string, + types: PropTypes.oneOfType([ + PropTypes.object, + PropTypes.undefined + ]) + })) + +}; diff --git a/modules/lo_dash_react_components/src/lib/components/WOSettings.testdata.js b/modules/lo_dash_react_components/src/lib/components/WOSettings.testdata.js new file mode 100644 index 00000000..b4815610 --- /dev/null +++ b/modules/lo_dash_react_components/src/lib/components/WOSettings.testdata.js @@ -0,0 +1,13 @@ + +const testData = { + options: [ + { id: 'a1', label: 'A1', parent: 'a' }, + { id: 'a2', types: { highlight: {}, metric: {} }, label: 'A2', parent: 'a' }, + { id: 'a1a', types: { highlight: {}, metric: {} }, label: 'A1A', parent: 'a1' }, + { id: 'a', label: 'A', parent: '' }, + { id: 'b', types: { highlight: {}, metric: {} }, label: 'B', parent: '' }, + { id: 'c', types: { highlight: {}, metric: {} }, label: 'C', parent: '' } + ] +}; + +export default testData; diff --git a/modules/lo_dash_react_components/src/lib/components/WOStudentTextTile.react.js b/modules/lo_dash_react_components/src/lib/components/WOStudentTextTile.react.js new file mode 100644 index 00000000..041bf875 --- /dev/null +++ b/modules/lo_dash_react_components/src/lib/components/WOStudentTextTile.react.js @@ -0,0 +1,155 @@ +import React, { Component } from 'react'; +import PropTypes from 'prop-types'; +import Card from 'react-bootstrap/Card'; +import Form from 'react-bootstrap/Form'; + +import LONameTag from './LONameTag.react'; + +function createGoogleDocumentURL (docId) { + return `https://docs.google.com/document/d/${docId}`; +} + +/** + * WOStudentTextTile + */ +export default class WOStudentTextTile extends Component { + constructor (props) { + super(props); + this.handleDocumentSelectChange = this.handleDocumentSelectChange.bind(this); + } + + handleDocumentSelectChange (event) { + this.props.setProps({ selectedDocument: event.target.value }); + } + + render () { + const { id, className, style, showHeader, studentInfo, selectedDocument, currentOptionHash, childComponent } = this.props; + // HACK we need to pass the appropriate student information into the child component + childComponent.props._dashprivate_layout.props = {...studentInfo.documents[selectedDocument]}; + + const documentIsSelected = selectedDocument && studentInfo.documents[selectedDocument]; + const isLoading = documentIsSelected && currentOptionHash !== studentInfo.documents[selectedDocument].optionHash; + let bodyClassName = isLoading ? 'loading' : ''; + bodyClassName = `${bodyClassName} overflow-auto position-relative`; + + const loadedItem = documentIsSelected + ? <>{childComponent} + :
Document information not found.
; + + // TODO the chunk of commented code allows for linking directly to the selected document + // and allows the user to select which document they wish to see at a given moment. + // Neither of these features are currently available due to limitations with the communication + // protocol. For now they are being commented out so users are not inclined to use them. + return ( + + + + {/*
+ + + + + {studentInfo.availableDocuments.map(doc => ( + + ))} + */} + + {isLoading && ( +
+
+ Loading... +
+ )} + + {loadedItem} + + + ); + } +} + +WOStudentTextTile.defaultProps = { + className: '', + showHeader: true, + style: {}, + studentInfo: { + profile: {}, + availableDocuments: [], + documents: {} + } +}; + +WOStudentTextTile.propTypes = { + /** + * The ID used to identify this component in Dash callbacks. + */ + id: PropTypes.string, + + /** + * Classes for the outer most div. + */ + className: PropTypes.string, + + /** + * Style to apply to the outer most item. This + * is usually used to set the size of the tile. + */ + style: PropTypes.object, + + /** + * Determine whether the header with the student + * name should be visible or not + */ + showHeader: PropTypes.bool, + + /** + * Which document is currently selected for this student + */ + selectedDocument: PropTypes.string, + + /** + * Hash of the current options, used to determine if we + * should be in a loading state or not. + */ + currentOptionHash: PropTypes.string, + + /** + * Component to use for within the card body + */ + childComponent: PropTypes.node, + + /** + * The breakpoints of our text + */ + studentInfo: PropTypes.exact({ + profile: PropTypes.object, + availableDocuments: PropTypes.arrayOf(PropTypes.exact({ + id: PropTypes.string, + title: PropTypes.string + })), + documents: PropTypes.object + // objectOf( + // PropTypes.shape({ + // text: PropTypes.string, + // breakpoints: PropTypes.arrayOf(PropTypes.any), + // optionHash: PropTypes.string + // }) + // ) + }), + + /** + * Dash-assigned callback that should be called to report property changes + * to Dash, to make them available for callbacks. + */ + setProps: PropTypes.func +}; diff --git a/modules/lo_dash_react_components/src/lib/components/WOStudentTextTile.testdata.js b/modules/lo_dash_react_components/src/lib/components/WOStudentTextTile.testdata.js new file mode 100644 index 00000000..0d297f06 --- /dev/null +++ b/modules/lo_dash_react_components/src/lib/components/WOStudentTextTile.testdata.js @@ -0,0 +1,52 @@ +const testData = { + id: 'example', + showHeader: true, + currentOptionHash: '123', + studentInfo: { + availableDocuments: [ + { id: '1_2V-Npp1L0G3cw4lcH_ENSo_y_OV1BP3s8NdnwaFbVw', title: 'Document A' }, + { id: 'docB', title: 'Document B' }, + { id: 'docC', title: 'Document C' } + ], + profile: { + email_address: 'example@example.com', + name: { + family_name: 'Doe', + full_name: 'John Doe', + given_name: 'John' + }, + photo_url: '//lh3.googleusercontent.com/a/default-user' + }, + documents: { + '1_2V-Npp1L0G3cw4lcH_ENSo_y_OV1BP3s8NdnwaFbVw': { + optionHash: '123', + text: "This summer was AMAZING!!! First, I went to the beach with my family for two whole weeks! We stayed in this super cool beach house that had its own private pool and hot tub. My siblings and I spent hours playing in the waves and building sandcastles on the beach. We even went on a snorkeling trip and saw some really cool fish!\n\nWhen we weren't at the beach, I hung out with my friends and we had a blast! We went to the trampoline park and played laser tag. I also started reading this really good book called \"The Giver\" and it was sooo good that I couldn't put it down.\n\nMy parents took me and my siblings on a road trip to visit our grandparents in another state. It was a pretty long drive, but we made some great memories along the way. We stopped at a few theme parks and went on some really cool rides. My favorite one was this roller coaster that had loops and corkscrews!\n\nAt home, I started working on my own little garden project. I planted some flowers and herbs, and even tried to grow my own tomatoes (which didn't quite work out as planned...). It was pretty cool seeing everything grow and flourish.\n\nOverall, this summer was definitely the best one yet!", + breakpoints: [ + { + id: 'split0', + tooltip: 'This is the first tooltip', + start: 220, + offset: 5, + style: { textDecoration: 'underline' } + }, + { + id: 'split1', + tooltip: 'This is a tooltip', + start: 240, + offset: 25, + style: { textDecoration: 'underline' } + }, + { + id: 'split2', + tooltip: 'This is another tooltip', + start: 310, + offset: 15, + style: { backgroundColor: 'green' } + } + ] + } + } + } +}; + +export default testData; diff --git a/modules/lo_dash_react_components/src/lib/index.js b/modules/lo_dash_react_components/src/lib/index.js index 0124cfdd..61fe4081 100644 --- a/modules/lo_dash_react_components/src/lib/index.js +++ b/modules/lo_dash_react_components/src/lib/index.js @@ -5,6 +5,8 @@ import LOCollapse from './components/LOCollapse.react'; import WOAnnotatedText from './components/WOAnnotatedText.react'; import WOMetrics from './components/WOMetrics.react'; import WOIndicatorBars from './components/WOIndicatorBars.react'; +import WOSettings from './components/WOSettings.react'; +import WOStudentTextTile from './components/WOStudentTextTile.react'; import WOTextHighlight from './components/WOTextHighlight.react'; import StudentSelectHeader from './components/StudentSelectHeader.react'; import LOTextMinibars from './components/LOTextMinibars.react'; @@ -17,6 +19,8 @@ export { LOPanelLayout, LOCollapse, WOAnnotatedText, + WOStudentTextTile, + WOSettings, WOMetrics, WOIndicatorBars, WOTextHighlight, diff --git a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/assets/scripts.js b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/assets/scripts.js index 9826c6d4..fe5c7756 100644 --- a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/assets/scripts.js +++ b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/assets/scripts.js @@ -3,22 +3,34 @@ */ if (!window.dash_clientside) { - window.dash_clientside = {} + window.dash_clientside = {}; } -pdfjsLib.GlobalWorkerOptions.workerSrc = '/static/3rd_party/pdf.worker.min.js' +pdfjsLib.GlobalWorkerOptions.workerSrc = '/static/3rd_party/pdf.worker.min.js'; + +const createStudentCard = async function (s, prompt) { + // TODO this ought to come from the comm protocol + const document = Object.keys(s.documents)[0]; + const student = s.documents[document]; + const promptHash = await hashObject({ prompt }); -const createStudentCard = function (student, prompt) { const header = { namespace: 'dash_bootstrap_components', type: 'CardHeader', props: { children: student.profile.name.full_name } - } + }; const studentText = { namespace: 'lo_dash_react_components', type: 'WOAnnotatedText', props: { text: student.text, breakpoints: [], className: 'border-end' } - } + }; + const errorMessage = { + namespace: 'dash_html_components', + type: 'Div', + props: { + children: 'An error occurred while processing the text.' + } + }; const feedbackMessage = { namespace: 'dash_html_components', type: 'Div', @@ -27,30 +39,35 @@ const createStudentCard = function (student, prompt) { className: student?.feedback ? 'p-1 overflow-auto' : '', style: { whiteSpace: 'pre-line' } } - } + }; const feedbackLoading = { namespace: 'dash_html_components', type: 'Div', props: { - children: { + children: [{ namespace: 'dash_bootstrap_components', type: 'Spinner', props: {} - }, + }, { + namespace: 'dash_html_components', + type: 'Div', + props: { children: 'Waiting for a response.' } + }], className: 'text-center' } - } - const feedback = prompt === student.prompt ? feedbackMessage : feedbackLoading + }; + const feedback = promptHash === student.option_hash ? feedbackMessage : feedbackLoading; + const feedbackOrError = 'error' in student ? errorMessage : feedback; const body = { namespace: 'lo_dash_react_components', type: 'LOPanelLayout', props: { children: studentText, - panels: [{ children: feedback, id: 'feedback-text', width: '40%' }], + panels: [{ children: feedbackOrError, id: 'feedback-text', width: '40%' }], shown: ['feedback-text'], className: 'overflow-auto p-1' } - } + }; const card = { namespace: 'dash_bootstrap_components', type: 'Card', @@ -58,25 +75,33 @@ const createStudentCard = function (student, prompt) { children: [header, body], style: { maxHeight: '375px' } } - } + }; return { namespace: 'dash_bootstrap_components', type: 'Col', props: { children: card, id: student.user_id, - width: 4 + xs: 12, + lg: 6, + xxl: 4 } - } -} + }; +}; + +const checkForResponse = function (s, promptHash) { + const document = Object.keys(s.documents)[0]; + const student = s.documents[document]; + return promptHash === student.option_hash; +}; const charactersAfterChar = function (str, char) { - const commaIndex = str.indexOf(char) + const commaIndex = str.indexOf(char); if (commaIndex === -1) { - return '' + return ''; } - return str.slice(commaIndex + 1).trim() -} + return str.slice(commaIndex + 1).trim(); +}; const extractPDF = async function (base64String) { const pdfData = atob(charactersAfterChar(base64String, ',')) @@ -101,7 +126,7 @@ const extractPDF = async function (base64String) { const allText = allTexts.join('\n') return allText -} +}; window.dash_clientside.bulk_essay_feedback = { /** @@ -109,35 +134,48 @@ window.dash_clientside.bulk_essay_feedback = { */ send_to_loconnection: async function (state, hash, clicks, docSrc, docDate, docTime, query, systemPrompt, tags) { if (state === undefined) { - return window.dash_clientside.no_update + return window.dash_clientside.no_update; } if (state.readyState === 1) { - if (hash.length === 0) { return window.dash_clientside.no_update } - const decoded = decode_string_dict(hash.slice(1)) - if (!decoded.course_id) { return window.dash_clientside.no_update } + if (hash.length === 0) { return window.dash_clientside.no_update; } + const decoded = decode_string_dict(hash.slice(1)); + if (!decoded.course_id) { return window.dash_clientside.no_update; } decoded.gpt_prompt = ''; decoded.message_id = ''; decoded.doc_source = docSrc; decoded.requested_timestamp = new Date(`${docDate}T${docTime}`).getTime().toString(); + // TODO what is a reasonable time to wait inbetween subsequent calls for + // the same arguments + decoded.rerun_dag_delay = 120; - const trig = window.dash_clientside.callback_context.triggered[0] + const trig = window.dash_clientside.callback_context.triggered[0]; if (trig.prop_id.includes('bulk-essay-analysis-submit-btn')) { decoded.gpt_prompt = query; decoded.system_prompt = systemPrompt; decoded.tags = tags; } + const optionsHash = await hashObject({ prompt: decoded.gpt_prompt }); + decoded.option_hash = optionsHash; + const message = { wo: { execution_dag: 'wo_bulk_essay_analysis', target_exports: ['gpt_bulk'], kwargs: decoded } - } - return JSON.stringify(message) + }; + return JSON.stringify(message); } - return window.dash_clientside.no_update + return window.dash_clientside.no_update; + }, + + toggleAdvanced: function (clicks, isOpen) { + if (!clicks) { + return window.dash_clientside.no_update; + } + return !isOpen; }, /** @@ -171,24 +209,29 @@ window.dash_clientside.bulk_essay_feedback = { namespace: 'dash_html_components', type: 'Li', props: { children: x } - } - }) + }; + }); return { namespace: 'dash_html_components', type: 'Ol', props: { children: items } - } + }; }, /** * update student cards based on new data in storage */ - update_student_grid: function (message, history) { - const currPrompt = history.length > 0 ? history[history.length - 1] : '' - const cards = message.map((x) => { - return createStudentCard(x, currPrompt) - }) - return cards + updateStudentGridOutput: async function (wsStorageData, history) { + if (!wsStorageData) { + return 'No students'; + } + const currPrompt = history.length > 0 ? history[history.length - 1] : ''; + + let output = []; + for (const student in wsStorageData) { + output = output.concat(await createStudentCard(wsStorageData[student], currPrompt)); + } + return output; }, /** @@ -201,14 +244,14 @@ window.dash_clientside.bulk_essay_feedback = { */ open_and_populate_attachment_panel: async function (contents, filename, timestamp, shown) { if (filename === undefined) { - return ['', '', shown] + return ['', '', shown]; } let data = '' if (filename.endsWith('.pdf')) { - data = await extractPDF(contents) + data = await extractPDF(contents); } // TODO add support for docx-like files - return [data, filename.slice(0, filename.lastIndexOf('.')), shown.concat('attachment')] + return [data, filename.slice(0, filename.lastIndexOf('.')), shown.concat('attachment')]; }, /** @@ -232,19 +275,22 @@ window.dash_clientside.bulk_essay_feedback = { * - submit query button disbaled status * - helper text for why we disabled the submit query button */ - disabled_query_submit: function (query, store) { + disableQuerySubmitButton: function (query, loading, store) { if (query.length === 0) { - return [true, 'Please create a request before submitting.'] + return [true, 'Please create a request before submitting.']; + } + if (loading) { + return [true, 'Please wait until current query has finished before resubmitting.']; } - const tags = Object.keys(store) - const queryTags = query.match(/[^{}]+(?=})/g) || [] - const diffs = queryTags.filter(x => !tags.includes(x)) + const tags = Object.keys(store); + const queryTags = query.match(/[^{}]+(?=})/g) || []; + const diffs = queryTags.filter(x => !tags.includes(x)); if (diffs.length > 0) { - return [true, `Unable to find [${diffs.join(',')}] within the tags. Please check that the spelling is correct or remove the extra tags.`] + return [true, `Unable to find [${diffs.join(',')}] within the tags. Please check that the spelling is correct or remove the extra tags.`]; } else if (!queryTags.includes('student_text')) { - return [true, 'Submission requires the inclusion of {student_text} to run the request over the student essays.'] + return [true, 'Submission requires the inclusion of {student_text} to run the request over the student essays.']; } - return [false, ''] + return [false, '']; }, /** @@ -267,7 +313,7 @@ window.dash_clientside.bulk_essay_feedback = { * populate word bank of tags */ update_tag_buttons: function (tagStore) { - const tagLabels = Object.keys(tagStore) + const tagLabels = Object.keys(tagStore); const tags = tagLabels.map((val) => { const button = { namespace: 'dash_bootstrap_components', @@ -279,10 +325,10 @@ window.dash_clientside.bulk_essay_feedback = { size: 'sm', color: 'secondary' } - } - return button - }) - return tags + }; + return button; + }); + return tags; }, /** @@ -306,8 +352,8 @@ window.dash_clientside.bulk_essay_feedback = { * - show alert * - JSON error data on the alert (only in debug) */ - update_alert_with_error: function (error) { - if (!error) { + updateAlertWithError: function (error) { + if (Object.keys(error).length === 0) { return ['', false, '']; } const text = 'Oops! Something went wrong ' + @@ -316,5 +362,27 @@ window.dash_clientside.bulk_essay_feedback = { 'exploring a different dashboard for now. ' + 'Thanks for your patience!'; return [text, true, error]; + }, + + disable_doc_src_datetime: function (value) { + if (value === 'ts') { + return [false, false]; + } + return [true, true]; + }, + + updateLoadingInformation: async function (wsStorageData, history) { + const noLoading = [false, 0, '']; + if (!wsStorageData) { + return noLoading; + } + const currentPrompt = history.length > 0 ? history[history.length - 1] : ''; + const promptHash = await hashObject({ prompt: currentPrompt }); + const returnedResponses = Object.values(wsStorageData).filter(student => checkForResponse(student, promptHash)).length; + const totalStudents = Object.keys(wsStorageData).length; + if (totalStudents === returnedResponses) { return noLoading; } + const loadingProgress = returnedResponses / totalStudents + 0.1; + const outputText = `Fetching responses from server. This will take a few minutes. (${returnedResponses}/${totalStudents} received)`; + return [true, loadingProgress, outputText]; } }; diff --git a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/dashboard/layout.py b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/dashboard/layout.py index f2fb946d..e3ee9a69 100644 --- a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/dashboard/layout.py +++ b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/dashboard/layout.py @@ -13,9 +13,8 @@ DEBUG_FLAG = True prefix = 'bulk-essay-analysis' -websocket = f'{prefix}-websocket' -ws_store = f'{prefix}-ws-store' -error_store = f'{prefix}-error-store' +_websocket = f'{prefix}-websocket' +_namespace = 'bulk_essay_feedback' alert = f'{prefix}-alert' alert_text = f'{prefix}-alert-text' @@ -25,13 +24,16 @@ panel_layout = f'{prefix}-panel-layout' -advanced_collapse = f'{prefix}-advanced-collapse' +_advanced_toggle = f'{prefix}-advanced-toggle' +_advanced_collapse = f'{prefix}-advanced-collapse' + system_input = f'{prefix}-system-prompt-input' -# document source +# document source DOM ids doc_src = f'{prefix}-doc-src' doc_src_date = f'{prefix}-doc-src-date' doc_src_timestamp = f'{prefix}-doc-src-timestamp' +# attachment upload DOM ids attachment_upload = f'{prefix}-attachment-upload' attachment_label = f'{prefix}-attachment-label' attachment_extracted_text = f'{prefix}-attachment-extracted-text' @@ -39,14 +41,23 @@ attachment_warning_message = f'{prefix}-attachment-warning-message' attachment_store = f'{prefix}-attachment-store' +# placeholder DOM ids tags = f'{prefix}-tags' +placeholder_tooltip = f'{prefix}-placeholder-tooltip' tag = f'{prefix}-tag' tag_store = f'{prefix}-tags-store' +# prompt history DOM ids history_body = f'{prefix}-history-body' history_store = f'{prefix}-history-store' favorite_store = f'{prefix}-favorite-store' +# loading message/bar DOM ids +_loading_prefix = f'{prefix}-loading' +_loading_collapse = f'{_loading_prefix}-collapse' +_loading_progress = f'{_loading_prefix}-progress-bar' +_loading_information = f'{_loading_prefix}-information-text' + submit = f'{prefix}-submit-btn' submit_warning_message = f'{prefix}-submit-warning-msg' grid = f'{prefix}-essay-grid' @@ -63,29 +74,27 @@ def layout(): Generic layout function to create dashboard ''' # advanced menu for system prompt - advanced = dbc.Col([ - lodrc.LOCollapse([ + advanced = [ + html.Div([ + dbc.Label('System prompt'), + dbc.Textarea(id=system_input, value=system_prompt) + ]), + html.Div([ + dbc.Label('Document Source'), + dbc.RadioItems(options=[ + {'label': 'Latest Document', 'value': 'latest' }, + {'label': 'Specific Time', 'value': 'ts'}, + ], value='latest', id=doc_src), dbc.InputGroup([ - dbc.InputGroupText('System prompt:'), - dbc.Textarea(id=system_input, value=system_prompt) - ]), - html.Div([ - dbc.Label('Document Source'), - dbc.RadioItems(options=[ - {'label': 'Latest Document', 'value': 'latest' }, - {'label': 'Specific Time', 'value': 'ts'}, - ], value='latest', id=doc_src), - dbc.InputGroup([ - dcc.DatePickerSingle(id=doc_src_date, date=datetime.date.today()), - dbc.Input(type='time', id=doc_src_timestamp, value=datetime.datetime.now().strftime("%H:%M")) - ]) + dcc.DatePickerSingle(id=doc_src_date, date=datetime.date.today()), + dbc.Input(type='time', id=doc_src_timestamp, value=datetime.datetime.now().strftime("%H:%M")) ]) - ], label='Advanced', id=advanced_collapse, is_open=False), - ]) + ]) + ] # history panel history_favorite_panel = dbc.Card([ - dbc.CardHeader('Prompts'), + dbc.CardHeader('Prompt History'), dbc.CardBody([], id=history_body), dcc.Store(id=history_store, data=[]) ], class_name='h-100') @@ -108,63 +117,88 @@ def layout(): # query creator panel input_panel = dbc.Card([ - dbc.InputGroup([ - dbc.InputGroupText([], id=tags, class_name='flex-grow-1', style={'gap': '5px'}), + dbc.CardHeader('Prompt Input'), + # TODO figure out the proper way to create new tags/upload docs + # then remove the `class_name='d-none'` from this button. + dbc.Button(dcc.Upload([html.I(className='fas fa-plus me-1'), 'Upload'], accept='.pdf', id=attachment_upload), class_name='d-none'), + dbc.CardBody([ + dbc.Textarea(id=query_input, value=starting_prompt, class_name='h-100', style={'minHeight': '150px'}), + html.Div([ + html.Span([ + 'Placeholders', + html.I(className='fas fa-circle-question ms-1', id=placeholder_tooltip) + ], className='me-1'), + html.Span([], id=tags), + ], className='mt-1'), + dbc.Tooltip( + 'Click a placeholder to insert it into your prompt. Upon submission, it will be replaced with the corresponding value.', + target=placeholder_tooltip + ), dcc.Store(id=tag_store, data={'student_text': ''}), - dbc.Button(dcc.Upload([html.I(className='fas fa-plus me-1'), 'Upload'], accept='.pdf', id=attachment_upload)) ]), - dbc.CardBody(dbc.Textarea(id=query_input, value=starting_prompt, class_name='h-100', style={'minHeight': '150px'})), dbc.CardFooter([ html.Small(id=submit_warning_message, className='text-danger'), dbc.Button('Submit', color='primary', id=submit, n_clicks=0, class_name='float-end') ]) - ], class_name='h-100') + ]) alert_component = dbc.Alert([ html.Div(id=alert_text), html.Div(DashRenderjson(id=alert_error_dump), className='' if DEBUG_FLAG else 'd-none') ], id=alert, color='danger', is_open=False) + loading_component = dbc.Collapse([ + html.Div(id=_loading_information), + dbc.Progress(id=_loading_progress, animated=True, striped=True, max=1.1) + ], id=_loading_collapse, is_open=False, class_name='mb-1 sticky-top bg-light') + # overall container cont = dbc.Container([ - html.H2('Prototype: Work in Progress'), - html.P( - 'This dashboard is a prototype allowing teachers to run ChatGPT over a set of essays. ' - 'The dashboard is subject to change based on ongoing feedback from teachers.' - ), - html.H2('AskGPT'), + html.H2('Writing Observer - AskGPT'), + dbc.InputGroup([ + dbc.InputGroupText(lodrc.LOConnectionAIO(aio_id=_websocket)), + dbc.Button([html.I(className='fas fa-cog me-1'), 'Advanced'], id=_advanced_toggle), + lodrc.ProfileSidebarAIO(class_name='rounded-0 rounded-end', color='secondary'), + ], class_name='mb-1'), + dbc.Collapse(advanced, id=_advanced_collapse, class_name='mb-1'), lodrc.LOPanelLayout( input_panel, panels=[ - {'children': history_favorite_panel, 'width': '20%', 'id': 'history-favorite', 'side': 'left'}, + {'children': history_favorite_panel, 'width': '30%', 'id': 'history-favorite'}, {'children': attachment_panel, 'width': '40%', 'id': 'attachment'}, ], shown=['history-favorite'], id=panel_layout ), - dbc.Row([advanced]), alert_component, - dbc.Row(id=grid, class_name='g-2 mt-2'), - lodrc.LOConnection(id=websocket), - dcc.Store(id=ws_store, data=[]), - dcc.Store(id=error_store, data=False) + html.H3('Student Text', className='mt-1'), + loading_component, + dbc.Row(id=grid, class_name='g-4'), ], fluid=True) return dcc.Loading(cont) # disbale document date/time options clientside_callback( - ClientsideFunction(namespace='clientside', function_name='disable_doc_src_datetime'), + ClientsideFunction(namespace='bulk_essay_feedback', function_name='disable_doc_src_datetime'), Output(doc_src_date, 'disabled'), Output(doc_src_timestamp, 'disabled'), Input(doc_src, 'value') ) +# Toggle if the advanced menu collapse is open or not +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='toggleAdvanced'), + Output(_advanced_collapse, 'is_open'), + Input(_advanced_toggle, 'n_clicks'), + State(_advanced_collapse, 'is_open') +) + # send request on websocket clientside_callback( ClientsideFunction(namespace='bulk_essay_feedback', function_name='send_to_loconnection'), - Output(websocket, 'send'), - Input(websocket, 'state'), # used for initial setup + Output(lodrc.LOConnectionAIO.ids.websocket(_websocket), 'send'), + Input(lodrc.LOConnectionAIO.ids.websocket(_websocket), 'state'), # used for initial setup Input('_pages_location', 'hash'), Input(submit, 'n_clicks'), Input(doc_src, 'value'), @@ -178,10 +212,11 @@ def layout(): # enable/disabled submit based on query # makes sure there is a query and the tags are properly formatted clientside_callback( - ClientsideFunction(namespace='bulk_essay_feedback', function_name='disabled_query_submit'), + ClientsideFunction(namespace='bulk_essay_feedback', function_name='disableQuerySubmitButton'), Output(submit, 'disabled'), Output(submit_warning_message, 'children'), Input(query_input, 'value'), + Input(_loading_collapse, 'is_open'), State(tag_store, 'data') ) @@ -215,28 +250,19 @@ def layout(): State(panel_layout, 'shown') ) -# store message from LOConnection in storage for later use -clientside_callback( - ClientsideFunction(namespace='bulk_essay_feedback', function_name='receive_ws_message'), - Output(ws_store, 'data'), - Output(error_store, 'data'), - Input(websocket, 'message'), - prevent_initial_call=True -) - clientside_callback( - ClientsideFunction(namespace='bulk_essay_feedback', function_name='update_alert_with_error'), + ClientsideFunction(namespace=_namespace, function_name='updateAlertWithError'), Output(alert_text, 'children'), Output(alert, 'is_open'), Output(alert_error_dump, 'data'), - Input(error_store, 'data') + Input(lodrc.LOConnectionAIO.ids.error_store(_websocket), 'data') ) # update student cards based on new data in storage clientside_callback( - ClientsideFunction(namespace='bulk_essay_feedback', function_name='update_student_grid'), + ClientsideFunction(namespace=_namespace, function_name='updateStudentGridOutput'), Output(grid, 'children'), - Input(ws_store, 'data'), + Input(lodrc.LOConnectionAIO.ids.ws_store(_websocket), 'data'), Input(history_store, 'data') ) @@ -278,3 +304,13 @@ def layout(): State(panel_layout, 'shown'), prevent_initial_call=True ) + +# update loading information +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='updateLoadingInformation'), + Output(_loading_collapse, 'is_open'), + Output(_loading_progress, 'value'), + Output(_loading_information, 'children'), + Input(lodrc.LOConnectionAIO.ids.ws_store(_websocket), 'data'), + Input(history_store, 'data') +) diff --git a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/gpt.py b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/gpt.py index 0f2ce6ee..baf06a6a 100644 --- a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/gpt.py +++ b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/gpt.py @@ -1,4 +1,5 @@ import aiohttp +import loremipsum import os import learning_observer.communication_protocol.integration @@ -71,9 +72,6 @@ async def chat_completion(self, prompt, system_prompt): class OllamaGPT(GPTAPI): '''GPT responder for handling request to the Ollama API - TODO this ought to just use requests instead of the specific ollama package - the format *should* be the same as the OpenAI responder. This will be one - less external module to rely on. ''' def __init__(self, **kwargs): ''' @@ -111,7 +109,7 @@ async def chat_completion(self, prompt, system_prompt): content = {'model': self.model, 'messages': messages, 'stream': False} async with aiohttp.ClientSession() as session: async with session.post(url, json=content) as resp: - json_resp = await resp.json(content_type=None) + json_resp = await resp.json() if resp.status == 200: return json_resp['message']['content'] error = 'Error occured while making Ollama request' @@ -120,9 +118,20 @@ async def chat_completion(self, prompt, system_prompt): raise GPTRequestErorr(error) +class StubGPT(GPTAPI): + '''GPT responder for handling stub requests + ''' + def __init__(self, **kwargs): + super().__init__() + + async def chat_completion(self, prompt, system_prompt): + return "\n".join(loremipsum.get_paragraphs(1)) + + GPT_RESPONDERS = { 'openai': OpenAIGPT, - 'ollama': OllamaGPT + 'ollama': OllamaGPT, + 'stub': StubGPT } @@ -170,7 +179,6 @@ async def process_student_essay(text, prompt, system_prompt, tags): We use a closure to allow the system to connect to the memoization KVS. ''' - copy_tags = tags.copy() @learning_observer.cache.async_memoization() @@ -203,7 +211,7 @@ async def gpt(gpt_prompt): async def test_responder(): - responder = OllamaGPT('llama2') + responder = OllamaGPT() response = await responder.chat_completion('Why is the sky blue?', 'You are a helper agent, please help fulfill user requests.') print('Response:', response) diff --git a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/module.py b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/module.py index 67ef2cef..d4c9f951 100644 --- a/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/module.py +++ b/modules/wo_bulk_essay_analysis/wo_bulk_essay_analysis/module.py @@ -31,8 +31,14 @@ EXECUTION_DAG = { 'execution_dag': { - 'gpt_map': q.map(gpt_bulk_essay, values=q.variable('writing_observer.docs'), value_path='text', func_kwargs={'prompt': q.parameter('gpt_prompt'), 'system_prompt': q.parameter('system_prompt'), 'tags': q.parameter('tags', required=False, default={})}), - 'gpt_bulk': q.join(LEFT=q.variable('gpt_map'), LEFT_ON='provenance.value.provenance.provenance.STUDENT.value.user_id', RIGHT=q.variable('writing_observer.roster'), RIGHT_ON='user_id') + 'gpt_map': q.map( + gpt_bulk_essay, + values=q.variable('writing_observer.docs'), + value_path='text', + func_kwargs={'prompt': q.parameter('gpt_prompt'), 'system_prompt': q.parameter('system_prompt'), 'tags': q.parameter('tags', required=False, default={})}, + parallel=True + ), + 'gpt_bulk': q.join(LEFT=q.variable('gpt_map'), LEFT_ON='provenance.provenance.provenance.STUDENT.value.user_id', RIGHT=q.variable('writing_observer.roster'), RIGHT_ON='user_id') }, 'exports': { 'gpt_bulk': { @@ -69,6 +75,6 @@ 'url': "/wo_bulk_essay_analysis/dash/bulk-essay-analysis", "icon": { "type": "fas", - "icon": "fa-pen-nib" + "icon": "fa-lightbulb" } }] diff --git a/modules/wo_classroom_text_highlighter/MANIFEST.in b/modules/wo_classroom_text_highlighter/MANIFEST.in new file mode 100644 index 00000000..d1e41f77 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/MANIFEST.in @@ -0,0 +1 @@ +include wo_classroom_text_highlighter/assets/* diff --git a/modules/wo_classroom_text_highlighter/README.md b/modules/wo_classroom_text_highlighter/README.md new file mode 100644 index 00000000..b1f7f2ed --- /dev/null +++ b/modules/wo_classroom_text_highlighter/README.md @@ -0,0 +1,5 @@ +# WO Classroom Text Highlighter + +The Writing Observer Classroom Text Highlighter is dashboard for highlighting student text. +Teachers are preseted with a grid of their students and a document for each of them. +Then, the teachers can select different language metrics to highlight the text with. diff --git a/modules/wo_classroom_text_highlighter/setup.cfg b/modules/wo_classroom_text_highlighter/setup.cfg new file mode 100644 index 00000000..50964c01 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/setup.cfg @@ -0,0 +1,10 @@ +[metadata] +name = Writing Observer Classroom Text Highlighter +description = Use this as a base template for creating new modules on the Learning Observer. + +[options] +packages = wo_classroom_text_highlighter + +[options.entry_points] +lo_modules = + wo_classroom_text_highlighter = wo_classroom_text_highlighter.module diff --git a/modules/wo_classroom_text_highlighter/setup.py b/modules/wo_classroom_text_highlighter/setup.py new file mode 100644 index 00000000..77bd0119 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/setup.py @@ -0,0 +1,14 @@ +''' +Install script. Everything is handled in setup.cfg + +To set up locally for development, run `python setup.py develop`, in a +virtualenv, preferably. +''' +from setuptools import setup + +setup( + name="wo_classroom_text_highlighter", + package_data={ + 'wo_classroom_text_highlighter': ['assets/*'], + } +) diff --git a/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/__init__.py b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/assets/general.css b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/assets/general.css new file mode 100644 index 00000000..9138b723 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/assets/general.css @@ -0,0 +1,24 @@ +.loading-circle { + border: 4px solid #e0e0e0; + border-top: 4px solid var(--bs-primary); + border-radius: 50%; + width: 24px; + height: 24px; + animation: spin 1s linear infinite; + margin: 0 auto; +} + +@keyframes spin { + 0% { + transform: rotate(0deg); + } + + 100% { + transform: rotate(360deg); + } +} + +.preset button:last-child { + border-top-left-radius: 0; + border-bottom-left-radius: 0; +} \ No newline at end of file diff --git a/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/assets/scripts.js b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/assets/scripts.js new file mode 100644 index 00000000..6167cfc4 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/assets/scripts.js @@ -0,0 +1,232 @@ +/** + * Javascript callbacks to be used with the LO Example dashboard + */ + +// Initialize the `dash_clientside` object if it doesn't exist +if (!window.dash_clientside) { + window.dash_clientside = {}; +} + +const DASH_HTML_COMPONENTS = 'dash_html_components'; +const LO_DASH_REACT_COMPONENTS = 'lo_dash_react_components'; + +// TODO this ought to move to a more common place +function createDashComponent (namespace, type, props) { + return { namespace, type, props }; +} + +function determineSelectedNLPOptionsList (options) { + return options.filter(option => + (option.types?.highlight?.value === true) || + (option.types?.metric?.value === true) + ).map(option => option.id); +} + +// TODO this ought to move to a more common place like liblo.js +async function hashObject (obj) { + const jsonString = JSON.stringify(obj); + const encoder = new TextEncoder(); + const data = encoder.encode(jsonString); + + // Check if crypto.subtle is available + if (crypto && crypto.subtle) { + try { + const hashBuffer = await crypto.subtle.digest('SHA-256', data); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + const hashHex = hashArray.map(byte => byte.toString(16).padStart(2, '0')).join(''); + return hashHex; + } catch (error) { + console.warn('crypto.subtle.digest failed; falling back to simple hash.'); + } + } + + // Fallback to the simple hash if crypto.subtle is unavailable + return simpleHash(jsonString); +} + +function simpleHash (str) { + let hash = 0; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + hash = (hash << 5) - hash + char; + hash |= 0; // Convert to 32-bit integer + } + return hash.toString(16); +} + +// TODO some of this will move to the communication protocol, but for now +// it lives here +// Currently the system only handles grabbing the first document available +// from the student and populates it under latest. We shouldn't hardcode +// anything like latest here and instead pull it from the communication protocol +function formatStudentData (student, selectedHighlights) { + // TODO this ought to come from the comm protocol + const document = Object.keys(student.documents)[0]; + + // TODO make sure the comm protocol is providing the doc id + const highlightBreakpoints = selectedHighlights.reduce((acc, option) => { + const offsets = student.documents[document][option.id]?.offsets || []; + if (offsets) { + const modifiedOffsets = offsets.map(offset => { + return { + id: '', + tooltip: option.label, + start: offset[0], + offset: offset[1], + style: { backgroundColor: option.types.highlight.color } + }; + }); + acc = acc.concat(modifiedOffsets); + } + return acc; + }, []); + // const availableDocuments = Object.keys(student.docs).map(id => ({ + // id, + // title: student.docs[id].title || id + // })); + // availableDocuments.push({ id: 'latest', title: 'Latest' }); + const availableDocuments = [{ id: 'latest', title: 'Latest' }] + // TODO currently we only populate the latest data of the student documents + // this is currently the muddiest part of the data flow and ought to be + // cleaned up. + return { + profile: student.documents[document].profile, + availableDocuments, + documents: { + latest: { + text: student.documents[document].text, + breakpoints: highlightBreakpoints, + optionHash: student.documents[document].option_hash + } + } + }; +} + +function styleStudentTile (width, height) { + return { width: `${(100 - width) / width}%`, height: `${height}px` }; +} + +window.dash_clientside.wo_classroom_text_highlighter = { + /** + * Send updated queries to the communication protocol. + * @param {object} wsReadyState LOConnection status object + * @param {string} urlHash query string from hash for determining course id + * @returns stringified json object that is sent to the communication protocl + */ + sendToLOConnection: async function (wsReadyState, urlHash, fullOptions) { + if (wsReadyState === undefined) { + return window.dash_clientside.no_update; + } + if (wsReadyState.readyState === 1) { + if (urlHash.length === 0) { return window.dash_clientside.no_update; } + const decodedParams = decode_string_dict(urlHash.slice(1)); + if (!decodedParams.course_id) { return window.dash_clientside.no_update; } + + const optionsHash = await hashObject(fullOptions); + const nlpOptions = determineSelectedNLPOptionsList(fullOptions); + decodedParams.nlp_options = nlpOptions; + decodedParams.option_hash = optionsHash; + const outgoingMessage = { + wo_classroom_text_highlighter_query: { + execution_dag: 'writing_observer', + // TODO add `doc_list` here when available + target_exports: ['docs_with_nlp_annotations'], + kwargs: decodedParams + } + }; + return JSON.stringify(outgoingMessage); + } + return window.dash_clientside.no_update; + }, + + toggleOptions: function (clicks, isOpen) { + if (!clicks) { + return window.dash_clientside.no_update; + } + return !isOpen; + }, + + adjustTileSize: function (width, height, studentIds) { + const total = studentIds.length; + return Array(total).fill(styleStudentTile(width, height)); + }, + + showHideHeader: function (show, ids) { + const total = ids.length; + return Array(total).fill(show ? 'd-none' : ''); + }, + + updateCurrentOptionHash: async function (options, ids) { + const optionHash = await hashObject(options); + const total = ids.length; + return Array(total).fill(optionHash); + }, + + /** + * Build the student UI components based on the stored websocket data + * @param {*} wsStorageData information stored in the websocket store + * @returns Dash object to be displayed on page + */ + populateOutput: async function (wsStorageData, options, width, height, showHeader) { + console.log('wsStorageData', wsStorageData); + if (!wsStorageData) { + return 'No students'; + } + let output = []; + + const selectedHighlights = options.filter(option => option.types?.highlight?.value); + // TODO do something with the selected metrics/progress bars/etc. + // currently due to a HACK with how we pass data to the `childComponent` + // we are only able to have a single child and we expect it to be the + // `WOAnnotatedText` component. + const selectedMetrics = options.filter(option => option.types?.metric?.value); + + const optionHash = await hashObject(options); + + for (const student in wsStorageData) { + const studentTile = createDashComponent( + LO_DASH_REACT_COMPONENTS, 'WOStudentTextTile', + { + showHeader, + style: styleStudentTile(width, height), + studentInfo: formatStudentData(wsStorageData[student], selectedHighlights), + // TODO the selectedDocument ought to remain the same upon updating the student object + // i.e. it should be pulled from the current client student state + selectedDocument: 'latest', + childComponent: createDashComponent(LO_DASH_REACT_COMPONENTS, 'WOAnnotatedText', {}), + id: { type: 'WOStudentTextTile', index: student }, + currentOptionHash: optionHash, + className: 'mb-2' + } + ); + output = output.concat(studentTile); + } + return output; + }, + + updateAlertWithError: function (error) { + if (Object.keys(error).length === 0) { + return ['', false, '']; + } + const text = 'Oops! Something went wrong ' + + "on our end. We've noted the " + + 'issue. Please try again later, or consider ' + + 'exploring a different dashboard for now. ' + + 'Thanks for your patience!'; + return [text, true, error]; + }, + + addPreset: function (clicks, name, options, store) { + if (!clicks) { return store; } + const copy = { ...store }; + copy[name] = options; + return copy; + }, + + applyPreset: function (clicks, data) { + const preset = window.dash_clientside.callback_context?.triggered_id.index ?? null; + const itemsClicked = clicks.some(item => item !== undefined); + if (!preset | !itemsClicked) { return window.dash_clientside.no_update; } + return data[preset]; + } +}; diff --git a/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/dash_dashboard.py b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/dash_dashboard.py new file mode 100644 index 00000000..a2e51808 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/dash_dashboard.py @@ -0,0 +1,160 @@ +''' +This file creates the layout and defines any callbacks +for the classroom highlight dashboard. +''' +from dash import html, dcc, clientside_callback, ClientsideFunction, Output, Input, State, ALL +import dash_bootstrap_components as dbc +import dash_renderjson +import lo_dash_react_components as lodrc + +import learning_observer.settings +import wo_classroom_text_highlighter.options +import wo_classroom_text_highlighter.preset_component + +DEBUG_FLAG = learning_observer.settings.RUN_MODE == learning_observer.settings.RUN_MODES.DEV + +_prefix = 'wo-classroom-text-highlighter' +_namespace = 'wo_classroom_text_highlighter' +_websocket = f'{_prefix}-websocket' +_output = f'{_prefix}-output' + +# Option components +_options_toggle = f'{_prefix}-options-toggle' +_options_collapse = f'{_prefix}-options-collapse' +# TODO abstract these into a more generic options component +_options_prefix = f'{_prefix}-options' +_options_width = f'{_options_prefix}-width' +_options_height = f'{_options_prefix}-height' +_options_hide_header = f'{_options_prefix}-hide-names' +_options_text_information = f'{_options_prefix}-text-information' + +options_component = [ + html.H4('View Options'), + dbc.Label('Students per row'), + dbc.Input(type='number', min=1, max=10, value=3, step=1, id=_options_width), + dbc.Label('Height of student tile'), + dcc.Slider(min=100, max=800, marks=None, value=500, id=_options_height), + dbc.Label('Student name headers'), + dbc.Switch(value=True, id=_options_hide_header, label='Show/Hide'), + html.H4('Highlight Options'), + wo_classroom_text_highlighter.preset_component.create_layout(), + lodrc.WOSettings(id=_options_text_information, options=wo_classroom_text_highlighter.options.OPTIONS, className='table table-striped align-middle') +] + +# Alert Component +_alert = f'{_prefix}-alert' +_alert_text = f'{_prefix}-alert-text' +_alert_error_dump = f'{_prefix}-alert-error-dump' + +alert_component = dbc.Alert([ + html.Div(id=_alert_text), + html.Div(dash_renderjson.DashRenderjson(id=_alert_error_dump), className='' if DEBUG_FLAG else 'd-none') +], id=_alert, color='danger', is_open=False) + + +def layout(): + ''' + Function to define the page's layout. + ''' + page_layout = html.Div([ + html.H1('Writing Observer - Classroom Text Highlighter'), + alert_component, + dbc.InputGroup([ + dbc.InputGroupText(lodrc.LOConnectionAIO(aio_id=_websocket)), + dbc.Button([ + html.I(className='fas fa-cog me-1'), + 'Highlight Options' + ], id=_options_toggle), + lodrc.ProfileSidebarAIO(class_name='rounded-0 rounded-end', color='secondary'), + ], class_name='sticky-top mb-1'), + dbc.Offcanvas(options_component, id=_options_collapse, scrollable=True, title='Settings'), + html.Div(id=_output, className='d-flex justify-content-between flex-wrap') + ]) + return page_layout + + +# Send the initial state based on the url hash to LO. +# If this is not included, nothing will be returned from +# the communication protocol. +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='sendToLOConnection'), + Output(lodrc.LOConnectionAIO.ids.websocket(_websocket), 'send'), + Input(lodrc.LOConnectionAIO.ids.websocket(_websocket), 'state'), # used for initial setup + Input('_pages_location', 'hash'), + Input(_options_text_information, 'options') +) + +# Build the UI based on what we've received from the +# communicaton protocol +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='populateOutput'), + Output(_output, 'children'), + Input(lodrc.LOConnectionAIO.ids.ws_store(_websocket), 'data'), + Input(_options_text_information, 'options'), + Input(_options_width, 'value'), + Input(_options_height, 'value'), + Input(_options_hide_header, 'value'), +) + +# Toggle if the options collapse is open or not +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='toggleOptions'), + Output(_options_collapse, 'is_open'), + Input(_options_toggle, 'n_clicks'), + State(_options_collapse, 'is_open') +) + +# Adjust student tile size +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='adjustTileSize'), + Output({'type': 'WOStudentTextTile', 'index': ALL}, 'style'), + Input(_options_width, 'value'), + Input(_options_height, 'value'), + State({'type': 'WOStudentTextTile', 'index': ALL}, 'id'), +) + +# Handle showing or hiding the student tile header +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='showHideHeader'), + Output({'type': 'WOStudentTextTile', 'index': ALL}, 'showHeader'), + Input(_options_hide_header, 'value'), + State({'type': 'WOStudentTextTile', 'index': ALL}, 'id'), +) + +# When options change, update the current option hash for all students. +# When the option hash is different from the students internal option hash +# a loading class is applied to each student tile. +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='updateCurrentOptionHash'), + Output({'type': 'WOStudentTextTile', 'index': ALL}, 'currentOptionHash'), + Input(_options_text_information, 'options'), + State({'type': 'WOStudentTextTile', 'index': ALL}, 'id'), +) + +# Update the alert component with any errors that come through +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='updateAlertWithError'), + Output(_alert_text, 'children'), + Output(_alert, 'is_open'), + Output(_alert_error_dump, 'data'), + Input(lodrc.LOConnectionAIO.ids.error_store(_websocket), 'data') +) + +# Save options as preset +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='addPreset'), + Output(wo_classroom_text_highlighter.preset_component._store, 'data'), + Input(wo_classroom_text_highlighter.preset_component._add_button, 'n_clicks'), + State(wo_classroom_text_highlighter.preset_component._add_input, 'value'), + State(_options_text_information, 'options'), + State(wo_classroom_text_highlighter.preset_component._store, 'data') +) + +# Apply clicked preset +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='applyPreset'), + Output(_options_text_information, 'options'), + Input({'type': wo_classroom_text_highlighter.preset_component._set_item, 'index': ALL}, 'n_clicks'), + State(wo_classroom_text_highlighter.preset_component._store, 'data'), + prevent_initial_call=True +) diff --git a/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/module.py b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/module.py new file mode 100644 index 00000000..68b8af37 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/module.py @@ -0,0 +1,56 @@ +''' +Writing Observer Classroom Text Highlighter + +Writing Observer dashboard for highlighting different attributes of text at the classroom level. +''' +import learning_observer.downloads as d +from learning_observer.dash_integration import thirdparty_url, static_url + +import wo_classroom_text_highlighter.dash_dashboard + +# Name for the module +NAME = 'Writing Observer - Classroom Text Highlighter' + +''' +Define pages created with Dash. +''' +DASH_PAGES = [ + { + 'MODULE': wo_classroom_text_highlighter.dash_dashboard, + 'LAYOUT': wo_classroom_text_highlighter.dash_dashboard.layout, + 'ASSETS': 'assets', + 'TITLE': 'Writing Observer Classroom Text Highlighter', + 'DESCRIPTION': 'Writing Observer dashboard for highlighting different attributes of text at the classroom level.', + 'SUBPATH': 'wo-classroom-text-highlighter', + 'CSS': [ + thirdparty_url("css/fontawesome_all.css") + ], + 'SCRIPTS': [ + static_url("liblo.js") + ] + } +] + +''' +Additional files we want included that come from a third part. +''' +THIRD_PARTY = { + "css/fontawesome_all.css": d.FONTAWESOME_CSS, + "webfonts/fa-solid-900.woff2": d.FONTAWESOME_WOFF2, + "webfonts/fa-solid-900.ttf": d.FONTAWESOME_TTF +} + +''' +The Course Dashboards are used to populate the modules +on the home screen. + +Note the icon uses Font Awesome v5 +''' +COURSE_DASHBOARDS = [{ + 'name': NAME, + 'url': "/wo_classroom_text_highlighter/dash/wo-classroom-text-highlighter", + "icon": { + "type": "fas", + "icon": "fa-highlighter" + } +}] diff --git a/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/options.py b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/options.py new file mode 100644 index 00000000..6b29036f --- /dev/null +++ b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/options.py @@ -0,0 +1,70 @@ +import copy +import writing_observer.nlp_indicators + +parents = [] + +OPTIONS = [ + {'id': indicator['id'], 'types': {'highlight': {}, 'metric': {}}, 'label': indicator['name'], 'parent': ''} + for indicator in writing_observer.nlp_indicators.INDICATOR_JSONS +] + +# TODO currently each preset is the full list of options with specific +# values being set to true/including a color. We ought to just store +# the true values and their respective colors. +# Though if we keep the entire list in the preset, we can choose colors +# for non-true values before they are selected. + +# Set of colors to use for highlighting with presets +HIGHLIGHTING_COLORS = [ + "#FFD700", # Golden Yellow + "#87CEEB", # Sky Blue + "#98FB98", # Pale Green + "#FFB6C1", # Light Pink + "#F0E68C", # Khaki + "#FF69B4", # Hot Pink + "#AFEEEE", # Pale Turquoise + "#FFA07A", # Light Salmon + "#D8BFD8", # Thistle + "#ADD8E6", # Light Blue + "#FFDEAD", # Navajo White + "#FA8072", # Salmon + "#E6E6FA", # Lavender + "#FFE4E1", # Misty Rose + "#F5DEB3" # Wheat +] + +# TODO these are used for creating the common presets +PRESETS_TO_CREATE = { + 'Narrative': ['direct_speech_verbs', 'indirect_speech', 'character_trait_words', 'in_past_tense', 'social_awareness'], + 'Argumentative': ['statements_of_opinion', 'statements_of_fact', 'information_sources', 'attributions', 'citations'], + 'Parts of Speech': ['adjectives', 'adverbs', 'nouns', 'proper_nouns', 'verbs', 'prepositions', 'coordinating_conjunction', 'subordinating_conjunction', 'auxiliary_verb', 'pronoun'], + 'Sentence Structure': ['simple_sentences', 'simple_with_complex_predicates', 'simple_with_compound_predicates', 'simple_with_compound_complex_predicates', 'compound_sentences', 'complex_sentences', 'compound_complex_sentences'], + 'Organization': ['main_idea_sentences', 'supporting_idea_sentences', 'supporting_detail_sentences'], + 'Tone': ['positive_tone', 'negative_tone', 'emotion_words', 'opinion_words'], + 'Vocabulary': ['academic_language', 'informal_language', 'latinate_words', 'polysyllabic_words', 'low_frequency_words'] +} + +deselect_all = 'Deselect All' +PRESETS = {deselect_all: OPTIONS} + + +def add_preset_to_presets(key, value): + '''This function creates a copy of the options and + sets each of the items in `value` to True along with + a highlighted color. This is for creating presets + from the `PRESETS_TO_CREATE` object. + ''' + color_index = 0 + preset = copy.deepcopy(OPTIONS) + for option in preset: + if option['id'] in value: + option['types']['highlight']['value'] = True + option['types']['highlight']['color'] = HIGHLIGHTING_COLORS[color_index] + color_index += 1 + + PRESETS[key] = preset + + +# Add each preset to PRESETS +for k, v in PRESETS_TO_CREATE.items(): + add_preset_to_presets(k, v) diff --git a/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/preset_component.py b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/preset_component.py new file mode 100644 index 00000000..dd9c86c6 --- /dev/null +++ b/modules/wo_classroom_text_highlighter/wo_classroom_text_highlighter/preset_component.py @@ -0,0 +1,94 @@ +'''This creates the input and clickable badges for different +presets the user wants displayed. +TODO create a react component that does this +''' +from dash import html, dcc, clientside_callback, callback, Output, Input, State, ALL, exceptions, Patch, ctx +import dash_bootstrap_components as dbc + +import wo_classroom_text_highlighter.options + +_prefix = 'option-preset' +_store = f'{_prefix}-store' +_add_input = f'{_prefix}-add-input' +_add_button = f'{_prefix}-add-button' +_tray = f'{_prefix}-tray' +_set_item = f'{_prefix}-set-item' +_remove_item = f'{_prefix}-remove-item' + + +def create_layout(): + add_preset = dbc.InputGroup([ + dbc.Input(id=_add_input, placeholder='Preset name', type='text', value=''), + dbc.Button([ + html.I(className='fas fa-plus me-1'), + 'Preset' + ], id=_add_button) + ], class_name='mb-1') + return html.Div([ + add_preset, + html.Div(id=_tray), + # TODO we ought to store the presets on the server instead of browser storage + dcc.Store(id=_store, data=wo_classroom_text_highlighter.options.PRESETS, storage_type='local') + ]) + + +# disabled add preset when name already exists +clientside_callback( + '''function (value, curr) { + if (value.length === 0) { return true; } + if (Object.keys(curr).includes(value)) { return true; } + return false; + }''', + Output(_add_button, 'disabled'), + Input(_add_input, 'value'), + State(_store, 'data') +) + +# clear input on add +clientside_callback( + '''function (clicks, curr) { + if (clicks) { return ''; } + return curr; + }''', + Output(_add_input, 'value'), + Input(_add_button, 'n_clicks'), + State(_add_input, 'value') +) + + +def create_tray_item(preset): + if preset == wo_classroom_text_highlighter.options.deselect_all: + return dbc.Button(preset, id={'type': _set_item, 'index': preset}, color='warning') + contents = dbc.ButtonGroup([ + dbc.Button(preset, id={'type': _set_item, 'index': preset}), + dcc.ConfirmDialogProvider( + dbc.Button(html.I(className='fas fa-trash fa-xs'), color='secondary'), + id={'type': _remove_item, 'index': preset}, + message=f'Are you sure you want to delete the `{preset}` preset?' + ) + ], class_name='preset') + return contents + + +@callback( + Output(_tray, 'children'), + Input(_store, 'modified_timestamp'), + State(_store, 'data') +) +def create_tray_items_from_store(ts, data): + if ts is None and data is None: + raise exceptions.PreventUpdate + return [html.Div(create_tray_item(preset), className='d-inline-block me-1 mb-1') for preset in reversed(data.keys())] + + +@callback( + Output(_store, 'data', allow_duplicate=True), + Input({'type': _remove_item, 'index': ALL}, 'submit_n_clicks'), + prevent_initial_call=True +) +def remove_item_from_store(clicks): + if not ctx.triggered_id or all(c is None for c in clicks): + raise exceptions.PreventUpdate + patched_store = Patch() + del patched_store[ctx.triggered_id['index']] + return patched_store diff --git a/modules/wo_common_student_errors/wo_common_student_errors/dashboard/layout.py b/modules/wo_common_student_errors/wo_common_student_errors/dashboard/layout.py index 5e1d6476..6572a595 100644 --- a/modules/wo_common_student_errors/wo_common_student_errors/dashboard/layout.py +++ b/modules/wo_common_student_errors/wo_common_student_errors/dashboard/layout.py @@ -3,6 +3,11 @@ This layout is pretty messy as we are constantly prototyping new ways of displaying information ''' +# TODO this module no longer works properly since switching +# the communication protocol to use an async generator. +error = f'The module WO Common student errors is not compatible with the communication protocol api.\n'\ + 'Please uninstall this module with `pip uninstall wo-common-student-errors`.' +raise RuntimeError(error) # package imports import dash_bootstrap_components as dbc from dash_renderjson import DashRenderjson diff --git a/modules/wo_document_list/wo_document_list/dashboard/layout.py b/modules/wo_document_list/wo_document_list/dashboard/layout.py index d273791a..a7744a22 100644 --- a/modules/wo_document_list/wo_document_list/dashboard/layout.py +++ b/modules/wo_document_list/wo_document_list/dashboard/layout.py @@ -1,6 +1,11 @@ ''' Define layout for per student list of documents ''' +# TODO this module no longer works properly since switching +# the communication protocol to use an async generator. +error = f'The module WO Document List is not compatible with the communication protocol api.\n'\ + 'Please uninstall this module with `pip uninstall wo-document-list`.' +raise RuntimeError(error) # package imports import dash_bootstrap_components as dbc import lo_dash_react_components as lodrc diff --git a/modules/wo_highlight_dashboard/wo_highlight_dashboard/dashboard/layout.py b/modules/wo_highlight_dashboard/wo_highlight_dashboard/dashboard/layout.py index c0323bcd..4d0dd7b8 100644 --- a/modules/wo_highlight_dashboard/wo_highlight_dashboard/dashboard/layout.py +++ b/modules/wo_highlight_dashboard/wo_highlight_dashboard/dashboard/layout.py @@ -1,6 +1,12 @@ ''' Define layout for student dashboard view ''' +# TODO this module no longer works properly since switching +# the communication protocol to use an async generator. +# Additionally, this module has been re-written as `wo_classroom_text_highlighter` +error = f'The module WO Highlight Dashboard is not compatible with the communication protocol api.\n'\ + 'Please uninstall this module with `pip uninstall wo-highlight-dashboard`.' +raise RuntimeError(error) # package imports import learning_observer.dash_wrapper as dash import dash_bootstrap_components as dbc diff --git a/modules/writing_observer/writing_observer/awe_nlp.py b/modules/writing_observer/writing_observer/awe_nlp.py index 5a5aa8da..ea8e71c0 100644 --- a/modules/writing_observer/writing_observer/awe_nlp.py +++ b/modules/writing_observer/writing_observer/awe_nlp.py @@ -44,9 +44,14 @@ def init_nlp(): except OSError as e: error_text = 'There was an issue loading `en_core_web_lg` from spacy. '\ '`awe_components` requires various models to operate properly. '\ - f'Run `{learning_observer.paths.PYTHON_EXECUTABLE} awe_components/setup/data.py` to install all '\ + f'Run `{learning_observer.paths.PYTHON_EXECUTABLE} -m awe_components.setup.data` to install all '\ 'of the necessary models.' - raise OSError(error_text) from e + + a = input('Spacy model `en_core_web_lg` not available. Would you like to download? (y/n)') + if a.strip().lower() not in ['y', 'yes']: + raise OSError(error_text) from e + import awe_components.setup.data + awe_components.setup.data.download_models() # Adding all of the components, since # each of them turns out to be implicated in @@ -337,7 +342,7 @@ async def process_writings_with_caching(writing_data, options=None, mode=RUN_MOD RUN_MODES.SERIAL: process_texts_serial } - for writing in writing_data: + async for writing in writing_data: text = writing.get('text', '') if len(text) == 0: continue @@ -349,20 +354,18 @@ async def process_writings_with_caching(writing_data, options=None, mode=RUN_MOD found_features, writing = await check_available_features_in_cache(cache, text_hash, requested_features, writing) # If all options were found if found_features == requested_features: - results.append(writing) + yield writing continue # Check if some options are a subset of running_features: features that are needed but are already running unfound_features, found_features, writing = await check_and_wait_for_running_features(writing, requested_features, found_features, cache, sleep_interval, wait_time_for_running_features, text_hash) # If all options are found if found_features == requested_features: - results.append(writing) + yield writing continue # Add not found options to running_features and update cache - results.append(await process_and_cache_missing_features(unfound_features, found_features, requested_features, cache, text_hash, writing)) - - return results + yield await process_and_cache_missing_features(unfound_features, found_features, requested_features, cache, text_hash, writing) if __name__ == '__main__': diff --git a/modules/writing_observer/writing_observer/document_timestamps.py b/modules/writing_observer/writing_observer/document_timestamps.py index 98d2aedf..b7c9f528 100644 --- a/modules/writing_observer/writing_observer/document_timestamps.py +++ b/modules/writing_observer/writing_observer/document_timestamps.py @@ -20,27 +20,27 @@ def select_source(sources, source): @learning_observer.communication_protocol.integration.publish_function('writing_observer.fetch_doc_at_timestamp') -def fetch_doc_at_timestamp(overall_timestamps, requested_timestamp=None): +async def fetch_doc_at_timestamp(overall_timestamps, requested_timestamp=None): ''' Iterate over a list of students and determine their latest document in reference to the `requested_timestamp`. `requested_timestamp` should be a string of ms since unix epoch ''' - output = [] - for student in overall_timestamps: + # output = [] + # TODO this should be an async gen + async for student in overall_timestamps: timestamps = student.get('timestamps', {}) student['doc_id'] = '' if requested_timestamp is None: # perhaps this should fetch the latest doc id instead - output.append(student) + yield student continue sorted_ts = sorted(timestamps.keys()) bisect_index = bisect.bisect_right(sorted_ts, requested_timestamp) - 1 if bisect_index < 0: - output.append(student) + yield student continue target_ts = sorted_ts[bisect_index] student['doc_id'] = timestamps[target_ts] - output.append(student) - return output + yield student diff --git a/modules/writing_observer/writing_observer/module.py b/modules/writing_observer/writing_observer/module.py index 095b6674..da1f03ac 100644 --- a/modules/writing_observer/writing_observer/module.py +++ b/modules/writing_observer/writing_observer/module.py @@ -49,7 +49,7 @@ name='nlp_source', type='nlp_source', description='Process the NLP components at time of execution '\ - 'dag `nlp` or read results from reducer `npl_sep_proc`.', + 'dag `nlp` or read results from reducer `nlp_sep_proc`.', default='nlp' ) pmss.parser('languagetool_source', parent='string', choices=['overall_lt', 'overall_lt_sep_proc'], transform=None) diff --git a/modules/writing_observer/writing_observer/stub_nlp.py b/modules/writing_observer/writing_observer/stub_nlp.py index 4357070a..bca9c352 100644 --- a/modules/writing_observer/writing_observer/stub_nlp.py +++ b/modules/writing_observer/writing_observer/stub_nlp.py @@ -128,8 +128,7 @@ async def process_texts(writing_data, options=None): ''' if options is None: options = writing_observer.nlp_indicators.INDICATORS.keys() - - for writing in writing_data: + async for writing in writing_data: text = writing.get('text', '') if len(text) == 0: continue @@ -158,8 +157,7 @@ async def process_texts(writing_data, options=None): 'offsets': select_random_segments(text, seed=id) }) writing.update(results) - - return writing_data + yield writing if __name__ == '__main__': diff --git a/modules/writing_observer/writing_observer/writing_analysis.py b/modules/writing_observer/writing_observer/writing_analysis.py index f052b789..c2cb9b33 100644 --- a/modules/writing_observer/writing_observer/writing_analysis.py +++ b/modules/writing_observer/writing_observer/writing_analysis.py @@ -13,6 +13,7 @@ import writing_observer.reconstruct_doc +import learning_observer.adapters import learning_observer.communication_protocol.integration from learning_observer.stream_analytics.helpers import student_event_reducer, kvs_pipeline, KeyField, EventField, Scope import learning_observer.settings @@ -409,3 +410,16 @@ def get_doc_id(event): doc_id = client.get('object', {}).get('id') return doc_id + +def document_link_to_doc_id(event): + ''' + Convert a document link to include a doc_id + ''' + doc_id = get_doc_id({'client': event}) + if doc_id and 'client' in event: + event['client']['doc_id'] = doc_id + elif doc_id: + event['doc_id'] = doc_id + return event + +learning_observer.adapters.adapter.add_common_migrator(document_link_to_doc_id, __file__) diff --git a/wo_requirements.txt b/wo_requirements.txt index 55a5cd61..54e26e12 100644 --- a/wo_requirements.txt +++ b/wo_requirements.txt @@ -1,4 +1,3 @@ writing_observer @ git+https://github.com/ETS-Next-Gen/writing_observer.git#subdirectory=modules/writing_observer/ -wo_highlight_dashboard @ git+https://github.com/ETS-Next-Gen/writing_observer.git#subdirectory=modules/wo_highlight_dashboard/ -wo_common_student_errors @ git+https://github.com/ETS-Next-Gen/writing_observer.git#subdirectory=modules/wo_common_student_errors/ wo_bulk_essay_analysis @ git+https://github.com/ETS-Next-Gen/writing_observer.git#subdirectory=modules/wo_bulk_essay_analysis/ +wo_classroom_text_highlighter @ git+https://github.com/ETS-Next-Gen/writing_observer.git#subdirectory=modules/wo_classroom_text_highlighter/