From 9fd98c2d846361307ec13fe143528313d9a58823 Mon Sep 17 00:00:00 2001 From: shaokaiye Date: Tue, 30 Jan 2024 14:08:57 +0100 Subject: [PATCH 1/3] handling bigger video upload and fix full empty frame during fill na --- Makefile | 2 +- amadeusgpt/implementation.py | 25 ++++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index b0713fc..56f5a7b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ export streamlit_app=True app: - streamlit run amadeusgpt/app.py --server.fileWatcherType none + streamlit run amadeusgpt/app.py --server.fileWatcherType none --server.maxUploadSize 1000 diff --git a/amadeusgpt/implementation.py b/amadeusgpt/implementation.py index a310d4e..c8573a0 100644 --- a/amadeusgpt/implementation.py +++ b/amadeusgpt/implementation.py @@ -1914,7 +1914,16 @@ def reject_outlier_keypoints(cls, keypoints, threshold_in_stds=2): return temp @classmethod - def ast_fillna_2d(cls, arr): + def ast_fillna_2d(cls, arr: np.ndarray) -> np.ndarray: + """ + Fills NaN values in a 4D keypoints array using linear interpolation. + + Parameters: + arr (np.ndarray): A 4D numpy array of shape (n_frames, n_individuals, n_kpts, n_dims). + + Returns: + np.ndarray: The 4D array with NaN values filled. + """ n_frames, n_individuals, n_kpts, n_dims = arr.shape arr_reshaped = arr.reshape(n_frames, -1) x = np.arange(n_frames) @@ -1922,13 +1931,15 @@ def ast_fillna_2d(cls, arr): valid_mask = ~np.isnan(arr_reshaped[:, i]) if np.all(valid_mask): continue - arr_reshaped[:, i] = np.interp( - x, x[valid_mask], arr_reshaped[valid_mask, i] - ) - # Reshape the array back to 4D - arr = arr_reshaped.reshape(n_frames, n_individuals, n_kpts, n_dims) + elif np.any(valid_mask): + # Perform interpolation when there are some valid points + arr_reshaped[:, i] = np.interp(x, x[valid_mask], arr_reshaped[valid_mask, i]) + else: + # Handle the case where all values are NaN + # Replace with a default value or another suitable handling + arr_reshaped[:, i].fill(0) # Example: filling with 0 - return arr + return arr_reshaped.reshape(n_frames, n_individuals, n_kpts, n_dims) @classmethod @timer_decorator From 6552f511159fe3f23581a72c66b9d8e0733bcaea Mon Sep 17 00:00:00 2001 From: shaokaiye Date: Mon, 12 Feb 2024 11:25:40 +0100 Subject: [PATCH 2/3] removed the disk caches as they are problematic --- amadeusgpt/app_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/amadeusgpt/app_utils.py b/amadeusgpt/app_utils.py index dd4d932..7c5b323 100644 --- a/amadeusgpt/app_utils.py +++ b/amadeusgpt/app_utils.py @@ -581,7 +581,6 @@ def update_df_data(new_item, index_to_update, df, csv_file): return csv_file, df -@st.cache_data(persist="disk") def get_scene_image(example): if AnimalBehaviorAnalysis.get_video_file_path() is not None: scene_image = Scene.get_scene_frame() From ac9bbbfc84afa74b2f55d2d280e6f76e3d6e9d0c Mon Sep 17 00:00:00 2001 From: shaokaiye Date: Wed, 14 Feb 2024 14:02:06 +0100 Subject: [PATCH 3/3] Added more comments --- amadeusgpt/app_utils.py | 1 - amadeusgpt/implementation.py | 13 ++++++++++- amadeusgpt/main.py | 42 ++++++++++++++++++++++++------------ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/amadeusgpt/app_utils.py b/amadeusgpt/app_utils.py index 7c5b323..d742b12 100644 --- a/amadeusgpt/app_utils.py +++ b/amadeusgpt/app_utils.py @@ -598,7 +598,6 @@ def get_scene_image(example): return get_scene_image(example) -@st.cache_data(persist="disk") def get_sam_image(example): if AnimalBehaviorAnalysis.get_video_file_path(): seg_objects = AnimalBehaviorAnalysis.get_seg_objects() diff --git a/amadeusgpt/implementation.py b/amadeusgpt/implementation.py index c8573a0..7e5397f 100644 --- a/amadeusgpt/implementation.py +++ b/amadeusgpt/implementation.py @@ -60,6 +60,7 @@ scene_frame_number = 0 + class Database: """ A singleton that stores all data. Should be easy to integrate with a Nonsql database @@ -1218,9 +1219,19 @@ def get_objects(self, video_file_path): else: return self.pickledata + + class AnimalBehaviorAnalysis: + """ + This class holds methods and objects that are useful for analyzing animal behavior. + It no longer holds the states of objects directly. Instead, it references to the Database + singleton object. This is to make the class more stateless and easier to use in a web app. + """ + + # to be deprecated task_programs = {} + # to be deprecated task_program_results = {} # if a function has a parameter, it assumes the result_buffer has it # special dataset flags set to be False @@ -1251,7 +1262,7 @@ def result_buffer(cls): @classmethod def release_cache_objects(cls): """ - For web app, switching from one example to the other requires a release of cached objects + For web app, switching from one example to the another requires a release of cached objects """ if Database.exist(cls.__name__, "animal_objects"): Database.delete(cls.__name__, "animal_objects") diff --git a/amadeusgpt/main.py b/amadeusgpt/main.py index d469eef..0457108 100644 --- a/amadeusgpt/main.py +++ b/amadeusgpt/main.py @@ -100,9 +100,11 @@ class AMADEUS: code_generator_brain.enforce_prompt = "" usage = 0 behavior_modules_in_context = True - # to save the behavior module strings for context window use + # load the integration modules to context smart_loading = True + # number of topk integration modules to load load_module_top_k = 3 + module_threshold = 0.7 context_window_dict = {} plot = False use_rephraser = True @@ -124,6 +126,7 @@ def release_cache_objects(cls): @classmethod def load_module_smartly(cls, user_input): + # TODO: need to improve the module matching by vector database sorted_query_results = match_module(user_input) if len(sorted_query_results) == 0: return None @@ -134,7 +137,7 @@ def load_module_smartly(cls, user_input): query_module = query_result[0] query_score = query_result[1][0][0] - if query_score > 0.7: + if query_score > cls.module_threshold: modules.append(query_module) # parse the query result by loading active loading module_path = os.sep.join(query_module.split(os.sep)[-2:]).replace( @@ -158,7 +161,7 @@ def magic_command(cls, user_input): AmadeusLogger.info(result.stdout.decode("utf-8")) @classmethod - def save_state(cls): + def save_state(cls, output_path = 'soul.pickle'): # save the class attributes of all classes that are under state_list. def get_class_variables(_class): return { @@ -171,15 +174,14 @@ def get_class_variables(_class): state = {k.__name__: get_class_variables(k) for k in cls.state_list} - output_filename = "soul.pickle" - with open(output_filename, "wb") as f: + with open(output_path, "wb") as f: pickle.dump(state, f) - AmadeusLogger.info(f"memory saved to {output_filename}") + AmadeusLogger.info(f"memory saved to {output_path}") @classmethod - def load_state(cls): + def load_state(cls, ckpt_path = 'soul.pickle'): # load the class variables into 3 class - memory_filename = "soul.pickle" + memory_filename = ckpt_path AmadeusLogger.info(f"loading memory from {memory_filename}") with open(memory_filename, "rb") as f: state = pickle.load(f) @@ -296,6 +298,7 @@ def chat( cls.interface_str, cls.behavior_modules_str ) cls.code_generator_brain.update_history("user", rephrased_user_msg) + response = cls.code_generator_brain.connect_gpt( cls.code_generator_brain.context_window, max_tokens=700, functions=functions ) @@ -307,10 +310,12 @@ def chat( thought_process, ) = cls.code_generator_brain.parse_openai_response(response) + # write down the task program for offline processing with open("temp_for_debug.json", "w") as f: out = {'function_code': function_code, 'query': rephrased_user_msg} json.dump(out, f, indent=4) + # handle_function_codes gives the answer with function outputs amadeus_answer = cls.core_loop( rephrased_user_msg, text, function_code, thought_process @@ -321,16 +326,19 @@ def chat( original_user_msg, amadeus_answer.function_code, code_output ) - # if there is an error or the function code is empty, we want to make sure we prevent ChatGPT to learn to output nothing from few-shot learning - # is this used anymore? + + # Could be used for in context feedback learning. Costly if amadeus_answer.has_error: cls.code_generator_brain.context_window[-1][ "content" ] += "\n While executing the code above, there was error so it is not correct answer\n" - elif amadeus_answer.has_error: - cls.code_generator_brain.context_window.pop() - cls.code_generator_brain.history.pop() + + # if there is an error or the function code is empty, we want to make sure we prevent ChatGPT to learn to output nothing from few-shot learning + #elif amadeus_answer.has_error: + # cls.code_generator_brain.context_window.pop() + # cls.code_generator_brain.history.pop() + else: # needs to manage memory of Amadeus for context window management and state restore etc. # we have it remember user's original question instead of the rephrased one for better @@ -351,10 +359,13 @@ def execute_python_function( exec(function_code, globals()) if "task_program" not in globals(): return None + + # TODO: to serialize and support different function arguments func_sigs = inspect.signature(task_program) if not func_sigs.parameters: result = task_program() else: + # TODO: We don't do this anymore. But in the future, Is passing result buffer from each function sustainable? result_buffer = AnimalBehaviorAnalysis.result_buffer AmadeusLogger.info(f"result_buffer: {result_buffer}") if isinstance(result_buffer, tuple): @@ -368,9 +379,11 @@ def execute_python_function( @classmethod def contribute(cls, program_name): """ + Deprecated Takes the program from the task program registry and write it into contribution folder TODO: split the task program into implementation and api - """ + """ + AmadeusLogger.info(f"contributing {program_name}") task_program = AnimalBehaviorAnalysis.task_programs[program_name] # removing add_symbol or add_task_program line @@ -390,6 +403,7 @@ def update_behavior_modules_str(cls): Called during loading behavior modules from disk or when task program is updated """ modules_str = [] + # context_window_dict is where integration modules are stored in current AMADEUS class for name, task_program in cls.context_window_dict.items(): modules_str.append(task_program) modules_str = modules_str[-cls.load_module_top_k :]