Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
388 changes: 220 additions & 168 deletions ingen/formatters/common_formatters.py

Large diffs are not rendered by default.

44 changes: 31 additions & 13 deletions ingen/formatters/formatter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
# Copyright (c) 2023 BlackRock, Inc.
# All Rights Reserved.

from ingen.formatters.common_formatters import *
import logging
import time

from ingen.formatters.common_formatters import (
column_filter,
get_formatter_from_type,
name_formatter,
)

log = logging.getLogger()

Expand All @@ -16,26 +23,37 @@ def __init__(self, df, columns, params):
def map_column_id_name(self):
id_name_map = {}
for column in self._columns:
id_name_map[column['src_col_name']] = column['dest_col_name'] if 'dest_col_name' in column else column[
'src_col_name']
id_name_map[column["src_col_name"]] = (
column["dest_col_name"]
if "dest_col_name" in column
else column["src_col_name"]
)
return id_name_map

def apply_format(self):
for column in self._columns:
for formatter in column.get('formatters', []):
formatter_func = get_formatter_from_type(formatter['type'])
col_name = column.get('src_col_name')
for formatter in column.get("formatters", []):
formatter_func = get_formatter_from_type(formatter["type"])
col_name = column.get("src_col_name")
if formatter_func is None:
raise ValueError(f"Invalid formatter type: {formatter.get('type')} "
f"on column {col_name}")
log.info(f"Formatting column {col_name} using {formatter.get('type')} formatter")
raise ValueError(
f"Invalid formatter type: {formatter.get('type')} "
f"on column {col_name}"
)
log.info(
f"Formatting column {col_name} using {formatter.get('type')} formatter"
)
start = time.time()
self._df = formatter_func(self._df, col_name, formatter.get('format'), self._param)
self._df = formatter_func(
self._df, col_name, formatter.get("format"), self._param
)
end = time.time()
log.info(f"Finished '{formatter.get('type')}' formatter on column {col_name} "
f"in {end - start:.2f} seconds")
log.info(
f"Finished '{formatter.get('type')}' formatter on column {col_name} "
f"in {end - start:.2f} seconds"
)

column_names = [col['src_col_name'] for col in self._columns]
column_names = [col["src_col_name"] for col in self._columns]
self._df = column_filter(self._df, column_names)
self._df = name_formatter(self._df, self._id_name_map)
return self._df
1 change: 1 addition & 0 deletions ingen/formatters/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2023 BlackRock, Inc.
# All Rights Reserved.


def addition(dataframe, col_name, columns):
if columns is None or len(columns) < 2:
return dataframe
Expand Down
4 changes: 2 additions & 2 deletions ingen/generators/interface_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ def validate(self, df, columns, data=None, sources=None):
validation_summaries = []
validated_dataframe = None
if sources is None:
log.info(f"Starting validations on Formatted data")
log.info("Starting validations on Formatted data")
self.validations = Validation(df, columns, data=data)
validated_dataframe, validation_summary = self.validations.apply_validations()
validation_summaries.append(validation_summary)
log.info(f" Finished validations on Formatted data")
log.info(" Finished validations on Formatted data")
else:
for source in sources:
validation_list = source.fetch_validations()
Expand Down
2 changes: 1 addition & 1 deletion ingen/reader/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def get_connection(self):
self.__connection = connection.MySQLConnection(host=self.__host, user=self.__user, password=self.__passwd,
database=self.__database) if not self.__connection else self.__connection
return self.__connection
except Error as e:
except Error:
raise RuntimeError("Not able to establish connection with this database.")

def get_cursor(self):
Expand Down
2 changes: 1 addition & 1 deletion ingen/utils/sql_query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def insert_values(cls, temp_table_config):
file_col = key['file_col']
col_size = f"({key.get('size')})" if 'size' in key else ''
separtaor = '' if key == temp_table_cols[-1] else ','
col_config += col_name + f" " + col_type + col_size + separtaor
col_config += col_name + " " + col_type + col_size + separtaor
col_list += col_name + separtaor
file_cols.append(file_col)
default_val = cls.fill_empty_values(key, col_type)
Expand Down
2 changes: 1 addition & 1 deletion ingen/validation/notification.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ def email_attributes(params, validation_action, validation_summary):
send_email(validation_action_to_address, email_body, subject)
if "blocker" in str(validation_summary):
raise ValueError(
f"Error while Validating interface file for the columns having severity as blocker"
"Error while Validating interface file for the columns having severity as blocker"
)
8 changes: 4 additions & 4 deletions test/formatters/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_addition_function_with_multiple_rows(self):
'weight4': [56, 13, 22]
})
col_name = 'sum'
columns = ['weight1', 'weight2', 'weight3', 'weight4'];
columns = ['weight1', 'weight2', 'weight3', 'weight4']
expected_data = sample_data.copy()
expected_data[col_name] = [106, 90, 121]
formatted_data = addition(sample_data, col_name, columns)
Expand All @@ -43,7 +43,7 @@ def test_subtraction_function_with_multiple_rows(self):
'weight4': [2, 5, 10]
})
col_name = 'sub'
columns = ['weight1', 'weight2', 'weight3', 'weight4'];
columns = ['weight1', 'weight2', 'weight3', 'weight4']
expected_data = sample_data.copy()
expected_data[col_name] = [10, 10, 10]
formatted_data = subtract(sample_data, col_name, columns)
Expand All @@ -69,7 +69,7 @@ def test_divide_function_with_multiple_rows(self):
'weight4': [2, 2, 2]
})
col_name = 'div'
columns = ['weight1', 'weight2', 'weight3', 'weight4'];
columns = ['weight1', 'weight2', 'weight3', 'weight4']
expected_data = sample_data.copy()
expected_data[col_name] = [16.0, 4.0, 8.0]
formatted_data = divide(sample_data, col_name, columns)
Expand All @@ -95,7 +95,7 @@ def test_multiply_function_with_multiple_rows(self):
'weight4': [2, 2, 2]
})
col_name = 'mul'
columns = ['weight1', 'weight2', 'weight3', 'weight4'];
columns = ['weight1', 'weight2', 'weight3', 'weight4']
expected_data = sample_data.copy()
expected_data[col_name] = [128, 32, 64]
formatted_data = multiply(sample_data, col_name, columns)
Expand Down
2 changes: 1 addition & 1 deletion test/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def test_path_without_date(self):

output = metadata.output

expected_path = f"/some/path/name.csv"
expected_path = "/some/path/name.csv"
self.assertEqual(expected_path, output.get("props").get("path"))

def test_metadata_validation_action(self):
Expand Down
1 change: 0 additions & 1 deletion test/reader/test_xml_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import unittest
from pathlib import Path
from pyexpat import ExpatError
from typing import Dict, Union, List
from unittest.mock import patch

import pandas as pd
Expand Down