diff --git a/docs/requirements.txt b/docs/requirements.txt index ce85e15ce..94b716c1a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,10 @@ defusedxml>=0.7.1 -inflect>=5.5.1 +inflect>=6.0.2 myst-parser>=0.18.1 -openpyxl>=3.0.9 +numpy>=1.21.6 +openpyxl>=3.1.0 pandas>=1.3.5 -portalocker>=2.4.0 -semantic_version>=2.9.0 +portalocker>=2.7.0 +semantic_version>=2.10.0 Sphinx>=5.2.2 sphinx_rtd_theme>=1.0.0 diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 2e9ae5adc..41c935687 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -193,12 +193,11 @@ def expand_defs(self, hed_schema, def_dict): from df_util import expand_defs expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns()) - def to_excel(self, file, output_assembled=False): + def to_excel(self, file): """ Output to an Excel file. Parameters: file (str or file-like): Location to save this base input. - output_assembled (bool): Plug in categories and values from the sidecar directly. Raises: ValueError: if empty file object or file cannot be opened. """ @@ -206,52 +205,33 @@ def to_excel(self, file, output_assembled=False): raise ValueError("Empty file name or object passed in to BaseInput.save.") dataframe = self._dataframe - old_columns = dataframe.columns - - if output_assembled: - dataframe = self.dataframe_a - new_columns = dataframe.columns - else: - new_columns = old_columns - if self._loaded_workbook: - column_mapping = {} # assembled dataframe column number to original worksheet number - for new_c, column in enumerate(new_columns): - for old_c, old_column in enumerate(old_columns): - if column == old_column: - column_mapping[new_c] = old_c - old_worksheet = self.get_worksheet(self._worksheet_name) # Excel spreadsheets are 1 based, then add another 1 for column names if present adj_row_for_col_names = 1 if self._has_column_names: adj_row_for_col_names += 1 adj_for_one_based_cols = 1 - for row_number in range(len(dataframe)): - for df_column_number, ws_column_number in column_mapping.items(): - cell_value = dataframe.iat[row_number, df_column_number] - + for row_number, text_file_row in dataframe.iterrows(): + for column_number, column_text in enumerate(text_file_row): + cell_value = dataframe.iloc[row_number, column_number] old_worksheet.cell(row_number + adj_row_for_col_names, - ws_column_number + adj_for_one_based_cols).value = cell_value + column_number + adj_for_one_based_cols).value = cell_value + self._loaded_workbook.save(file) else: dataframe.to_excel(file, header=self._has_column_names) - def to_csv(self, file=None, output_assembled=False): + def to_csv(self, file=None): """ Write to file or return as a string. Parameters: file (str, file-like, or None): Location to save this file. If None, return as string. - output_assembled (bool): Plug in categories and values from the sidecar directly. Returns: None or str: None if file is given or the contents as a str if file is None. """ dataframe = self._dataframe - - if output_assembled: - dataframe = self.dataframe_a - csv_string_if_filename_none = dataframe.to_csv(file, '\t', index=False, header=self._has_column_names) return csv_string_if_filename_none diff --git a/requirements.txt b/requirements.txt index 02309238a..443e763d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ defusedxml>=0.7.1 -inflect>=5.5.1 -openpyxl>=3.0.9 +inflect>=6.0.2 +numpy>=1.21.6 +openpyxl>=3.1.0 pandas>=1.3.5 -portalocker>=2.4.0 -semantic_version>=2.9.0 +portalocker>=2.7.0 +semantic_version>=2.10.0 diff --git a/tests/models/test_spreadsheet_input.py b/tests/models/test_spreadsheet_input.py index 7d3f590d4..b009ef419 100644 --- a/tests/models/test_spreadsheet_input.py +++ b/tests/models/test_spreadsheet_input.py @@ -32,7 +32,7 @@ def test_all(self): hed_input = self.default_test_file_name has_column_names = True column_prefix_dictionary = {1: 'Label/', 2: 'Description'} - tag_columns = [3] + tag_columns = [4] worksheet_name = 'LKT Events' file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name, @@ -50,7 +50,7 @@ def test_all2(self): hed_input = self.default_test_file_name has_column_names = True column_prefix_dictionary = {1: 'Label/', "Short label": 'Description'} - tag_columns = [3] + tag_columns = [4] worksheet_name = 'LKT Events' file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name, @@ -113,7 +113,7 @@ def test_to_excel_should_work(self): column_prefix_dictionary={1: 'Label/', 2: 'Description/'}, name='ExcelOneSheet.xlsx') buffer = io.BytesIO() - spreadsheet.to_excel(buffer, output_assembled=True) + spreadsheet.to_excel(buffer) buffer.seek(0) v = buffer.getvalue() self.assertGreater(len(v), 0, "It should have a length greater than 0") @@ -214,49 +214,21 @@ def test_to_excel_workbook(self): tag_columns=["HED tags"]) test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled.xlsx" excel_book.convert_to_long(self.hed_schema) - excel_book.to_excel(test_output_name, True) + excel_book.to_excel(test_output_name) reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3") self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe)) - excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", - tag_columns=["HED tags"], - column_prefix_dictionary={ - "Short label": "Label/", - "Description in text": "Description" - }) - test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_prefix.xlsx" - excel_book.convert_to_long(self.hed_schema) - excel_book.to_excel(test_output_name, True) - reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", - tag_columns=["Short label", "Description in text", "HED tags"]) - - self.assertTrue(excel_book.dataframe_a.equals(reloaded_df.dataframe_a)) - def test_to_excel_workbook_no_col_names(self): excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", tag_columns=[4], has_column_names=False) test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_no_col_names.xlsx" excel_book.convert_to_long(self.hed_schema) - excel_book.to_excel(test_output_name, True) + excel_book.to_excel(test_output_name) reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[4], has_column_names=False) self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe)) - excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", has_column_names=False, - tag_columns=[4], - column_prefix_dictionary={ - 1: "Label/", - 3: "Description" - }) - test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_prefix.xlsx" - excel_book.convert_to_long(self.hed_schema) - excel_book.to_excel(test_output_name, True) - reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[1, 3, 4], - has_column_names=False) - - self.assertTrue(excel_book.dataframe_a.equals(reloaded_df.dataframe_a)) - if __name__ == '__main__': unittest.main()