Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
defusedxml>=0.7.1
inflect>=5.5.1
inflect>=6.0.2
myst-parser>=0.18.1
openpyxl>=3.0.9
numpy>=1.21.6
openpyxl>=3.1.0
pandas>=1.3.5
portalocker>=2.4.0
semantic_version>=2.9.0
portalocker>=2.7.0
semantic_version>=2.10.0
Sphinx>=5.2.2
sphinx_rtd_theme>=1.0.0
34 changes: 7 additions & 27 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,65 +193,45 @@ def expand_defs(self, hed_schema, def_dict):
from df_util import expand_defs
expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns())

def to_excel(self, file, output_assembled=False):
def to_excel(self, file):
""" Output to an Excel file.

Parameters:
file (str or file-like): Location to save this base input.
output_assembled (bool): Plug in categories and values from the sidecar directly.
Raises:
ValueError: if empty file object or file cannot be opened.
"""
if not file:
raise ValueError("Empty file name or object passed in to BaseInput.save.")

dataframe = self._dataframe
old_columns = dataframe.columns

if output_assembled:
dataframe = self.dataframe_a
new_columns = dataframe.columns
else:
new_columns = old_columns

if self._loaded_workbook:
column_mapping = {} # assembled dataframe column number to original worksheet number
for new_c, column in enumerate(new_columns):
for old_c, old_column in enumerate(old_columns):
if column == old_column:
column_mapping[new_c] = old_c

old_worksheet = self.get_worksheet(self._worksheet_name)
# Excel spreadsheets are 1 based, then add another 1 for column names if present
adj_row_for_col_names = 1
if self._has_column_names:
adj_row_for_col_names += 1
adj_for_one_based_cols = 1
for row_number in range(len(dataframe)):
for df_column_number, ws_column_number in column_mapping.items():
cell_value = dataframe.iat[row_number, df_column_number]

for row_number, text_file_row in dataframe.iterrows():
for column_number, column_text in enumerate(text_file_row):
cell_value = dataframe.iloc[row_number, column_number]
old_worksheet.cell(row_number + adj_row_for_col_names,
ws_column_number + adj_for_one_based_cols).value = cell_value
column_number + adj_for_one_based_cols).value = cell_value

self._loaded_workbook.save(file)
else:
dataframe.to_excel(file, header=self._has_column_names)

def to_csv(self, file=None, output_assembled=False):
def to_csv(self, file=None):
""" Write to file or return as a string.

Parameters:
file (str, file-like, or None): Location to save this file. If None, return as string.
output_assembled (bool): Plug in categories and values from the sidecar directly.
Returns:
None or str: None if file is given or the contents as a str if file is None.

"""
dataframe = self._dataframe

if output_assembled:
dataframe = self.dataframe_a

csv_string_if_filename_none = dataframe.to_csv(file, '\t', index=False, header=self._has_column_names)
return csv_string_if_filename_none

Expand Down
9 changes: 5 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
defusedxml>=0.7.1
inflect>=5.5.1
openpyxl>=3.0.9
inflect>=6.0.2
numpy>=1.21.6
openpyxl>=3.1.0
pandas>=1.3.5
portalocker>=2.4.0
semantic_version>=2.9.0
portalocker>=2.7.0
semantic_version>=2.10.0
38 changes: 5 additions & 33 deletions tests/models/test_spreadsheet_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_all(self):
hed_input = self.default_test_file_name
has_column_names = True
column_prefix_dictionary = {1: 'Label/', 2: 'Description'}
tag_columns = [3]
tag_columns = [4]
worksheet_name = 'LKT Events'

file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name,
Expand All @@ -50,7 +50,7 @@ def test_all2(self):
hed_input = self.default_test_file_name
has_column_names = True
column_prefix_dictionary = {1: 'Label/', "Short label": 'Description'}
tag_columns = [3]
tag_columns = [4]
worksheet_name = 'LKT Events'

file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name,
Expand Down Expand Up @@ -113,7 +113,7 @@ def test_to_excel_should_work(self):
column_prefix_dictionary={1: 'Label/', 2: 'Description/'},
name='ExcelOneSheet.xlsx')
buffer = io.BytesIO()
spreadsheet.to_excel(buffer, output_assembled=True)
spreadsheet.to_excel(buffer)
buffer.seek(0)
v = buffer.getvalue()
self.assertGreater(len(v), 0, "It should have a length greater than 0")
Expand Down Expand Up @@ -214,49 +214,21 @@ def test_to_excel_workbook(self):
tag_columns=["HED tags"])
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
excel_book.to_excel(test_output_name)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3")

self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe))

excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3",
tag_columns=["HED tags"],
column_prefix_dictionary={
"Short label": "Label/",
"Description in text": "Description"
})
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_prefix.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3",
tag_columns=["Short label", "Description in text", "HED tags"])

self.assertTrue(excel_book.dataframe_a.equals(reloaded_df.dataframe_a))

def test_to_excel_workbook_no_col_names(self):
excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3",
tag_columns=[4], has_column_names=False)
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_no_col_names.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
excel_book.to_excel(test_output_name)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[4],
has_column_names=False)
self.assertTrue(excel_book.dataframe.equals(reloaded_df.dataframe))

excel_book = SpreadsheetInput(self.default_test_file_name, worksheet_name="LKT 8HED3", has_column_names=False,
tag_columns=[4],
column_prefix_dictionary={
1: "Label/",
3: "Description"
})
test_output_name = self.base_output_folder + "ExcelMultipleSheets_resave_assembled_prefix.xlsx"
excel_book.convert_to_long(self.hed_schema)
excel_book.to_excel(test_output_name, True)
reloaded_df = SpreadsheetInput(test_output_name, worksheet_name="LKT 8HED3", tag_columns=[1, 3, 4],
has_column_names=False)

self.assertTrue(excel_book.dataframe_a.equals(reloaded_df.dataframe_a))


if __name__ == '__main__':
unittest.main()