From 8d1a21465233a1c2178c062948f10d08cb16ba10 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Tue, 26 Mar 2019 13:16:20 +0200 Subject: [PATCH 01/13] performace improvent for dataframes from kusto results --- azure-kusto-data/azure/kusto/data/_models.py | 30 +++++++++++++++++++- azure-kusto-data/azure/kusto/data/helpers.py | 24 +++++++++------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index 25404e32..8dc12a64 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -9,6 +9,14 @@ from .exceptions import KustoServiceError +pandas_exist = True + +try: + import pandas +except: + pandas_exist = False + + class WellKnownDataSet(Enum): """Categorizes data tables according to the role they play in the data set that a Kusto query returns.""" @@ -26,6 +34,7 @@ class KustoResultRow(object): def __init__(self, columns, row): self._value_by_name = {} self._value_by_index = [] + self._hidden_values = [] self._seventh_digit = {} for i, value in enumerate(row): column = columns[i] @@ -39,6 +48,8 @@ def __init__(self, columns, row): if lower_column_type in ["datetime", "timespan"]: if value is None: typed_value = None + if pandas_exist: + self._hidden_values.append(None) else: try: # If you are here to read this, you probably hit some datetime/timedelta inconsistencies. @@ -52,7 +63,10 @@ def __init__(self, columns, row): if char.isdigit(): tick = int(char) last = value[-1] if value[-1].isalpha() else "" - typed_value = KustoResultRow.convertion_funcs[lower_column_type](value[:-2] + last) + lookback = 2 if last else 1 + if pandas_exist: + self._hidden_values.append(value[:-lookback] + char + "00" + last) + typed_value = KustoResultRow.convertion_funcs[lower_column_type](value[:-lookback] + last) if tick: if lower_column_type == "datetime": self._seventh_digit[column.column_name] = tick @@ -62,12 +76,21 @@ def __init__(self, columns, row): ) else: typed_value = KustoResultRow.convertion_funcs[lower_column_type](value) + if pandas_exist: + self._hidden_values.append(value) except (IndexError, AttributeError): typed_value = KustoResultRow.convertion_funcs[lower_column_type](value) + if pandas_exist: + self._hidden_values.append(value) + elif lower_column_type in KustoResultRow.convertion_funcs: typed_value = KustoResultRow.convertion_funcs[lower_column_type](value) + if pandas_exist: + self._hidden_values.append(value) else: typed_value = value + if pandas_exist: + self._hidden_values.append(value) self._value_by_index.append(typed_value) self._value_by_name[column.column_name] = typed_value @@ -129,6 +152,11 @@ def __init__(self, json_table): self.rows = [KustoResultRow(self.columns, row) for row in json_table["Rows"]] + @property + def _rows(self): + for row in self.rows: + yield row._hidden_values + @property def rows_count(self): return len(self.rows) diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index c9138eac..149484a0 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -6,7 +6,7 @@ def dataframe_from_result_table(table): """Converts Kusto tables into pandas DataFrame. - :param azure.kusto.data._models.KustoResultTable table: Table recieved from the response. + :param azure.kusto.data._models.KustoResultTable table: Table received from the response. :return: pandas DataFrame. :rtype: pandas.DataFrame """ @@ -16,16 +16,18 @@ def dataframe_from_result_table(table): if not isinstance(table, KustoResultTable): raise TypeError("Expected KustoResultTable got {}".format(type(table).__name__)) - frame = pandas.DataFrame.from_records( - [row.to_list() for row in table.rows], columns=[col.column_name for col in table.columns] - ) - bool_columns = [col.column_name for col in table.columns if col.column_type == "bool"] - for col in bool_columns: - frame[col] = frame[col].astype(bool) + columns = [col.column_name for col in table.columns] + frame = pandas.DataFrame(table._rows, columns=columns) - for i in range(len(table.rows)): - seventh = table.rows[i]._seventh_digit - for name in seventh.keys(): - frame.loc[i, name] += pandas.Timedelta(seventh[name] * 100, unit="ns") + # fix types + for col in table.columns: + if col.column_type == "bool": + frame[col.column_name] = frame[col.column_name].astype(bool) + elif col.column_type == "datetime": + # as string first because can be None due to previous conversions + frame[col.column_name] = pandas.to_datetime(frame[col.column_name]) + elif col.column_type == "timespan": + # as string first because can be None due to previous conversions + frame[col.column_name] = pandas.to_timedelta(frame[col.column_name]) return frame From 64ff29bf37299db9247f3cd0392dd2662244f15a Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 31 Mar 2019 17:08:19 +0300 Subject: [PATCH 02/13] fixed timestamp special cases --- azure-kusto-data/azure/kusto/data/_models.py | 93 ++++++++++++++------ azure-kusto-data/azure/kusto/data/helpers.py | 4 +- azure-kusto-data/tests/test_kusto_client.py | 20 ++--- 3 files changed, 78 insertions(+), 39 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index 8dc12a64..ccafdd42 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -9,13 +9,35 @@ from .exceptions import KustoServiceError -pandas_exist = True +keep_high_precision_values = True try: import pandas except: - pandas_exist = False - + keep_high_precision_values = False + + +def _get_precise_repr(t, raw_value, typed_value, **kwargs): + if t == "datetime": + lookback = kwargs.get('lookback') + seventh_char = kwargs.get('seventh_char') + last = kwargs.get('last') + + if seventh_char and seventh_char.isdigit(): + return raw_value[:-lookback] + seventh_char + "00" + last + else: + return raw_value + elif t == "timespan": + seconds_fractions_part = kwargs.get('seconds_fractions_part') + if seconds_fractions_part: + whole_part = int(typed_value.total_seconds()) + fractions = str(whole_part) + '.' + seconds_fractions_part + total_seconds = float(fractions) + return total_seconds + else: + return typed_value.total_seconds() + else: + raise ValueError("Unknown type {t}".format(t)) class WellKnownDataSet(Enum): """Categorizes data tables according to the role they play in the data set that a Kusto query returns.""" @@ -39,18 +61,25 @@ def __init__(self, columns, row): for i, value in enumerate(row): column = columns[i] try: - lower_column_type = column.column_type.lower() + column_type = column.column_type.lower() except AttributeError: self._value_by_index.append(value) self._value_by_name[columns[i]] = value + if keep_high_precision_values: + self._hidden_values.append(value) continue - if lower_column_type in ["datetime", "timespan"]: + if column_type in ["datetime", "timespan"]: if value is None: typed_value = None - if pandas_exist: + if keep_high_precision_values: self._hidden_values.append(None) else: + seconds_fractions_part = None + seventh_char = None + last = value[-1] if type(value) is str and value[-1].isalpha() else "" + lookback = None + try: # If you are here to read this, you probably hit some datetime/timedelta inconsistencies. # Azure-Data-Explorer(Kusto) supports 7 decimal digits, while the corresponding python types supports only 6. @@ -59,37 +88,47 @@ def __init__(self, columns, row): # this precision in case they want it. One example why one might want this precision, is when # working with pandas. In that case, use azure.kusto.data.helpers.dataframe_from_result_table # which takes into account the 7th digit. - char = value.split(":")[2].split(".")[1][6] - if char.isdigit(): - tick = int(char) - last = value[-1] if value[-1].isalpha() else "" + seconds_part = value.split(":")[2] + seconds_fractions_part = seconds_part.split(".")[1] + seventh_char = seconds_fractions_part[6] + + if seventh_char.isdigit(): + tick = int(seventh_char) + lookback = 2 if last else 1 - if pandas_exist: - self._hidden_values.append(value[:-lookback] + char + "00" + last) - typed_value = KustoResultRow.convertion_funcs[lower_column_type](value[:-lookback] + last) + + typed_value = KustoResultRow.convertion_funcs[column_type](value[:-lookback] + last) + # this is a special case where plain python will lose precision, so we keep the precise value hidden + # when transforming to pandas, we can use the hidden value to covert to precise types if tick: - if lower_column_type == "datetime": + if column_type == "datetime": self._seventh_digit[column.column_name] = tick - else: + elif column_type == "timespan": self._seventh_digit[column.column_name] = ( tick if abs(typed_value) == typed_value else -tick - ) + ) + else: + raise TypeError("Unexpected type {}".format(column_type)) else: - typed_value = KustoResultRow.convertion_funcs[lower_column_type](value) - if pandas_exist: - self._hidden_values.append(value) + typed_value = KustoResultRow.convertion_funcs[column_type](value) + except (IndexError, AttributeError): - typed_value = KustoResultRow.convertion_funcs[lower_column_type](value) - if pandas_exist: - self._hidden_values.append(value) - - elif lower_column_type in KustoResultRow.convertion_funcs: - typed_value = KustoResultRow.convertion_funcs[lower_column_type](value) - if pandas_exist: + typed_value = KustoResultRow.convertion_funcs[column_type](value) + + if keep_high_precision_values: + self._hidden_values.append(_get_precise_repr( + column_type, value, typed_value, + seconds_fractions_part = seconds_fractions_part, + last= last, + lookback = lookback, + seventh_char=seventh_char)) + elif column_type in KustoResultRow.convertion_funcs: + typed_value = KustoResultRow.convertion_funcs[column_type](value) + if keep_high_precision_values: self._hidden_values.append(value) else: typed_value = value - if pandas_exist: + if keep_high_precision_values: self._hidden_values.append(value) self._value_by_index.append(typed_value) diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index 149484a0..6769bd01 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -25,9 +25,9 @@ def dataframe_from_result_table(table): frame[col.column_name] = frame[col.column_name].astype(bool) elif col.column_type == "datetime": # as string first because can be None due to previous conversions - frame[col.column_name] = pandas.to_datetime(frame[col.column_name]) + frame[col.column_name] = pandas.to_datetime(frame[col.column_name], utc=True) elif col.column_type == "timespan": # as string first because can be None due to previous conversions - frame[col.column_name] = pandas.to_timedelta(frame[col.column_name]) + frame[col.column_name] = pandas.to_timedelta(frame[col.column_name], unit='s') return frame diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index 38cd7639..61d47a95 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -241,16 +241,16 @@ def test_sanity_data_frame(self, mock_post): "xdate": Series( [ pandas.to_datetime(None), - pandas.to_datetime("2014-01-01T01:01:01.0000000Z").tz_convert(UTC), - pandas.to_datetime("2015-01-01T01:01:01.0000001Z").tz_convert(UTC), - pandas.to_datetime("2016-01-01T01:01:01.0000002Z").tz_convert(UTC), - pandas.to_datetime("2017-01-01T01:01:01.0000003Z").tz_convert(UTC), - pandas.to_datetime("2018-01-01T01:01:01.0000004Z").tz_convert(UTC), - pandas.to_datetime("2019-01-01T01:01:01.0000005Z").tz_convert(UTC), - pandas.to_datetime("2020-01-01T01:01:01.0000006Z").tz_convert(UTC), - pandas.to_datetime("2021-01-01T01:01:01.0000007Z").tz_convert(UTC), - pandas.to_datetime("2022-01-01T01:01:01.0000008Z").tz_convert(UTC), - pandas.to_datetime("2023-01-01T01:01:01.0000009Z").tz_convert(UTC), + pandas.to_datetime("2014-01-01T01:01:01.0000000Z").tz_convert('UTC'), + pandas.to_datetime("2015-01-01T01:01:01.0000001Z").tz_convert('UTC'), + pandas.to_datetime("2016-01-01T01:01:01.0000002Z").tz_convert('UTC'), + pandas.to_datetime("2017-01-01T01:01:01.0000003Z").tz_convert('UTC'), + pandas.to_datetime("2018-01-01T01:01:01.0000004Z").tz_convert('UTC'), + pandas.to_datetime("2019-01-01T01:01:01.0000005Z").tz_convert('UTC'), + pandas.to_datetime("2020-01-01T01:01:01.0000006Z").tz_convert('UTC'), + pandas.to_datetime("2021-01-01T01:01:01.0000007Z").tz_convert('UTC'), + pandas.to_datetime("2022-01-01T01:01:01.0000008Z").tz_convert('UTC'), + pandas.to_datetime("2023-01-01T01:01:01.0000009Z").tz_convert('UTC'), ] ), "xsmalltext": Series( From 2b6e54137adb9aac34ad9ff6f85ce79390d8bc14 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 31 Mar 2019 17:09:02 +0300 Subject: [PATCH 03/13] black --- azure-kusto-data/azure/kusto/data/_models.py | 40 +++++++++++--------- azure-kusto-data/azure/kusto/data/helpers.py | 2 +- azure-kusto-data/tests/test_kusto_client.py | 20 +++++----- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index ccafdd42..009a16bf 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -19,19 +19,19 @@ def _get_precise_repr(t, raw_value, typed_value, **kwargs): if t == "datetime": - lookback = kwargs.get('lookback') - seventh_char = kwargs.get('seventh_char') - last = kwargs.get('last') + lookback = kwargs.get("lookback") + seventh_char = kwargs.get("seventh_char") + last = kwargs.get("last") if seventh_char and seventh_char.isdigit(): return raw_value[:-lookback] + seventh_char + "00" + last else: return raw_value elif t == "timespan": - seconds_fractions_part = kwargs.get('seconds_fractions_part') + seconds_fractions_part = kwargs.get("seconds_fractions_part") if seconds_fractions_part: whole_part = int(typed_value.total_seconds()) - fractions = str(whole_part) + '.' + seconds_fractions_part + fractions = str(whole_part) + "." + seconds_fractions_part total_seconds = float(fractions) return total_seconds else: @@ -39,6 +39,7 @@ def _get_precise_repr(t, raw_value, typed_value, **kwargs): else: raise ValueError("Unknown type {t}".format(t)) + class WellKnownDataSet(Enum): """Categorizes data tables according to the role they play in the data set that a Kusto query returns.""" @@ -94,11 +95,11 @@ def __init__(self, columns, row): if seventh_char.isdigit(): tick = int(seventh_char) - + lookback = 2 if last else 1 - + typed_value = KustoResultRow.convertion_funcs[column_type](value[:-lookback] + last) - # this is a special case where plain python will lose precision, so we keep the precise value hidden + # this is a special case where plain python will lose precision, so we keep the precise value hidden # when transforming to pandas, we can use the hidden value to covert to precise types if tick: if column_type == "datetime": @@ -106,22 +107,27 @@ def __init__(self, columns, row): elif column_type == "timespan": self._seventh_digit[column.column_name] = ( tick if abs(typed_value) == typed_value else -tick - ) + ) else: raise TypeError("Unexpected type {}".format(column_type)) else: typed_value = KustoResultRow.convertion_funcs[column_type](value) - + except (IndexError, AttributeError): typed_value = KustoResultRow.convertion_funcs[column_type](value) - + if keep_high_precision_values: - self._hidden_values.append(_get_precise_repr( - column_type, value, typed_value, - seconds_fractions_part = seconds_fractions_part, - last= last, - lookback = lookback, - seventh_char=seventh_char)) + self._hidden_values.append( + _get_precise_repr( + column_type, + value, + typed_value, + seconds_fractions_part=seconds_fractions_part, + last=last, + lookback=lookback, + seventh_char=seventh_char, + ) + ) elif column_type in KustoResultRow.convertion_funcs: typed_value = KustoResultRow.convertion_funcs[column_type](value) if keep_high_precision_values: diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index 6769bd01..bf934f6f 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -28,6 +28,6 @@ def dataframe_from_result_table(table): frame[col.column_name] = pandas.to_datetime(frame[col.column_name], utc=True) elif col.column_type == "timespan": # as string first because can be None due to previous conversions - frame[col.column_name] = pandas.to_timedelta(frame[col.column_name], unit='s') + frame[col.column_name] = pandas.to_timedelta(frame[col.column_name], unit="s") return frame diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index 61d47a95..7374b257 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -241,16 +241,16 @@ def test_sanity_data_frame(self, mock_post): "xdate": Series( [ pandas.to_datetime(None), - pandas.to_datetime("2014-01-01T01:01:01.0000000Z").tz_convert('UTC'), - pandas.to_datetime("2015-01-01T01:01:01.0000001Z").tz_convert('UTC'), - pandas.to_datetime("2016-01-01T01:01:01.0000002Z").tz_convert('UTC'), - pandas.to_datetime("2017-01-01T01:01:01.0000003Z").tz_convert('UTC'), - pandas.to_datetime("2018-01-01T01:01:01.0000004Z").tz_convert('UTC'), - pandas.to_datetime("2019-01-01T01:01:01.0000005Z").tz_convert('UTC'), - pandas.to_datetime("2020-01-01T01:01:01.0000006Z").tz_convert('UTC'), - pandas.to_datetime("2021-01-01T01:01:01.0000007Z").tz_convert('UTC'), - pandas.to_datetime("2022-01-01T01:01:01.0000008Z").tz_convert('UTC'), - pandas.to_datetime("2023-01-01T01:01:01.0000009Z").tz_convert('UTC'), + pandas.to_datetime("2014-01-01T01:01:01.0000000Z").tz_convert("UTC"), + pandas.to_datetime("2015-01-01T01:01:01.0000001Z").tz_convert("UTC"), + pandas.to_datetime("2016-01-01T01:01:01.0000002Z").tz_convert("UTC"), + pandas.to_datetime("2017-01-01T01:01:01.0000003Z").tz_convert("UTC"), + pandas.to_datetime("2018-01-01T01:01:01.0000004Z").tz_convert("UTC"), + pandas.to_datetime("2019-01-01T01:01:01.0000005Z").tz_convert("UTC"), + pandas.to_datetime("2020-01-01T01:01:01.0000006Z").tz_convert("UTC"), + pandas.to_datetime("2021-01-01T01:01:01.0000007Z").tz_convert("UTC"), + pandas.to_datetime("2022-01-01T01:01:01.0000008Z").tz_convert("UTC"), + pandas.to_datetime("2023-01-01T01:01:01.0000009Z").tz_convert("UTC"), ] ), "xsmalltext": Series( From 2e86ed5ee4e44617cb305751704bb1563bba380b Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 31 Mar 2019 19:17:11 +0300 Subject: [PATCH 04/13] timezone fixes --- azure-kusto-data/azure/kusto/data/helpers.py | 3 ++- azure-kusto-data/tests/test_kusto_client.py | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index bf934f6f..733028ff 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -2,6 +2,7 @@ import pandas from ._models import KustoResultTable +from dateutil.tz import UTC def dataframe_from_result_table(table): @@ -25,7 +26,7 @@ def dataframe_from_result_table(table): frame[col.column_name] = frame[col.column_name].astype(bool) elif col.column_type == "datetime": # as string first because can be None due to previous conversions - frame[col.column_name] = pandas.to_datetime(frame[col.column_name], utc=True) + frame[col.column_name] = pandas.to_datetime(frame[col.column_name], utc=True).dt.tz_convert(UTC) elif col.column_type == "timespan": # as string first because can be None due to previous conversions frame[col.column_name] = pandas.to_timedelta(frame[col.column_name], unit="s") diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index 7374b257..f58dc211 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -241,16 +241,16 @@ def test_sanity_data_frame(self, mock_post): "xdate": Series( [ pandas.to_datetime(None), - pandas.to_datetime("2014-01-01T01:01:01.0000000Z").tz_convert("UTC"), - pandas.to_datetime("2015-01-01T01:01:01.0000001Z").tz_convert("UTC"), - pandas.to_datetime("2016-01-01T01:01:01.0000002Z").tz_convert("UTC"), - pandas.to_datetime("2017-01-01T01:01:01.0000003Z").tz_convert("UTC"), - pandas.to_datetime("2018-01-01T01:01:01.0000004Z").tz_convert("UTC"), - pandas.to_datetime("2019-01-01T01:01:01.0000005Z").tz_convert("UTC"), - pandas.to_datetime("2020-01-01T01:01:01.0000006Z").tz_convert("UTC"), - pandas.to_datetime("2021-01-01T01:01:01.0000007Z").tz_convert("UTC"), - pandas.to_datetime("2022-01-01T01:01:01.0000008Z").tz_convert("UTC"), - pandas.to_datetime("2023-01-01T01:01:01.0000009Z").tz_convert("UTC"), + pandas.to_datetime("2014-01-01T01:01:01.0000000Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2015-01-01T01:01:01.0000001Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2016-01-01T01:01:01.0000002Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2017-01-01T01:01:01.0000003Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2018-01-01T01:01:01.0000004Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2019-01-01T01:01:01.0000005Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2020-01-01T01:01:01.0000006Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2021-01-01T01:01:01.0000007Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2022-01-01T01:01:01.0000008Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2023-01-01T01:01:01.0000009Z", utc=True).tz_convert(UTC), ] ), "xsmalltext": Series( From 957aff513f7eaa94fba505683039513ecb976577 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 31 Mar 2019 19:35:14 +0300 Subject: [PATCH 05/13] no message --- azure-kusto-data/tests/test_kusto_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index f58dc211..db641801 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -126,7 +126,7 @@ def test_sanity_query(self, mock_post): self.assertEqual(row["xuint16"], expected["xuint16"]) self.assertEqual(row["xuint32"], expected["xuint32"]) self.assertEqual(row["xuint64"], expected["xuint64"]) - self.assertEqual(row["xdate"], expected["xdate"]) + self.assertEqual(row["xdate"], expected["xdate"], '{} not equal to {}'.format(row["xdate"], expected["xdate"])) self.assertEqual(row["xsmalltext"], expected["xsmalltext"]) self.assertEqual(row["xtext"], expected["xtext"]) self.assertEqual(row["xnumberAsText"], expected["xnumberAsText"]) From f05e9c0f61356611b5a058ed52d9b45d07025431 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Mon, 1 Apr 2019 10:12:33 +0300 Subject: [PATCH 06/13] final fixes for missing timezones --- .../azure/kusto/data/_converters.py | 11 ++++++++-- azure-kusto-data/azure/kusto/data/_models.py | 22 +++++++++---------- azure-kusto-data/tests/test_kusto_client.py | 2 +- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_converters.py b/azure-kusto-data/azure/kusto/data/_converters.py index 5dea612a..2d8898c1 100644 --- a/azure-kusto-data/azure/kusto/data/_converters.py +++ b/azure-kusto-data/azure/kusto/data/_converters.py @@ -3,6 +3,7 @@ from datetime import timedelta import re from dateutil import parser +from dateutil.tz import UTC import six # Regex for TimeSpan @@ -15,8 +16,14 @@ def to_datetime(value): return None if isinstance(value, six.integer_types): - return parser.parse(value) - return parser.isoparse(value) + parsed = parser.parse(value) + + parsed = parser.isoparse(value) + + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=UTC) + + return parsed def to_timedelta(value): diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index 009a16bf..1c5143de 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -25,17 +25,18 @@ def _get_precise_repr(t, raw_value, typed_value, **kwargs): if seventh_char and seventh_char.isdigit(): return raw_value[:-lookback] + seventh_char + "00" + last - else: - return raw_value + + return raw_value elif t == "timespan": seconds_fractions_part = kwargs.get("seconds_fractions_part") if seconds_fractions_part: whole_part = int(typed_value.total_seconds()) fractions = str(whole_part) + "." + seconds_fractions_part total_seconds = float(fractions) + return total_seconds - else: - return typed_value.total_seconds() + + return typed_value.total_seconds() else: raise ValueError("Unknown type {t}".format(t)) @@ -75,10 +76,11 @@ def __init__(self, columns, row): typed_value = None if keep_high_precision_values: self._hidden_values.append(None) + else: seconds_fractions_part = None seventh_char = None - last = value[-1] if type(value) is str and value[-1].isalpha() else "" + last = value[-1] if isinstance(value, six.string_types) and value[-1].isalpha() else "" lookback = None try: @@ -95,12 +97,9 @@ def __init__(self, columns, row): if seventh_char.isdigit(): tick = int(seventh_char) - - lookback = 2 if last else 1 - + lookback = 2 if last else 1 typed_value = KustoResultRow.convertion_funcs[column_type](value[:-lookback] + last) - # this is a special case where plain python will lose precision, so we keep the precise value hidden - # when transforming to pandas, we can use the hidden value to covert to precise types + if tick: if column_type == "datetime": self._seventh_digit[column.column_name] = tick @@ -112,10 +111,11 @@ def __init__(self, columns, row): raise TypeError("Unexpected type {}".format(column_type)) else: typed_value = KustoResultRow.convertion_funcs[column_type](value) - except (IndexError, AttributeError): typed_value = KustoResultRow.convertion_funcs[column_type](value) + # this is a special case where plain python will lose precision, so we keep the precise value hidden + # when transforming to pandas, we can use the hidden value to convert to precise pandas/numpy types if keep_high_precision_values: self._hidden_values.append( _get_precise_repr( diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index db641801..0017529e 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -126,7 +126,7 @@ def test_sanity_query(self, mock_post): self.assertEqual(row["xuint16"], expected["xuint16"]) self.assertEqual(row["xuint32"], expected["xuint32"]) self.assertEqual(row["xuint64"], expected["xuint64"]) - self.assertEqual(row["xdate"], expected["xdate"], '{} not equal to {}'.format(row["xdate"], expected["xdate"])) + self.assertEqual(row["xdate"], expected["xdate"])) self.assertEqual(row["xsmalltext"], expected["xsmalltext"]) self.assertEqual(row["xtext"], expected["xtext"]) self.assertEqual(row["xnumberAsText"], expected["xnumberAsText"]) From c0c2c48d91758f1c9a97db17436fd3478958fdb1 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Mon, 1 Apr 2019 10:47:09 +0300 Subject: [PATCH 07/13] typo --- azure-kusto-data/azure/kusto/data/_models.py | 2 +- azure-kusto-data/tests/test_kusto_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index 1c5143de..0cbddaca 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -97,7 +97,7 @@ def __init__(self, columns, row): if seventh_char.isdigit(): tick = int(seventh_char) - lookback = 2 if last else 1 + lookback = 2 if last else 1 typed_value = KustoResultRow.convertion_funcs[column_type](value[:-lookback] + last) if tick: diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index 0017529e..f58dc211 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -126,7 +126,7 @@ def test_sanity_query(self, mock_post): self.assertEqual(row["xuint16"], expected["xuint16"]) self.assertEqual(row["xuint32"], expected["xuint32"]) self.assertEqual(row["xuint64"], expected["xuint64"]) - self.assertEqual(row["xdate"], expected["xdate"])) + self.assertEqual(row["xdate"], expected["xdate"]) self.assertEqual(row["xsmalltext"], expected["xsmalltext"]) self.assertEqual(row["xtext"], expected["xtext"]) self.assertEqual(row["xnumberAsText"], expected["xnumberAsText"]) From 902b318fb02615594f8cdd005391492ef3376595 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 14 Apr 2019 16:01:03 +0300 Subject: [PATCH 08/13] updated code (less cumbersome) --- .../azure/kusto/data/_converters.py | 34 ++++++-- azure-kusto-data/azure/kusto/data/_models.py | 81 ++----------------- azure-kusto-data/azure/kusto/data/helpers.py | 8 +- azure-kusto-data/tests/test_functional.py | 16 ++-- azure-kusto-data/tests/test_kusto_client.py | 32 +++++--- 5 files changed, 66 insertions(+), 105 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_converters.py b/azure-kusto-data/azure/kusto/data/_converters.py index 2d8898c1..0ecc7b62 100644 --- a/azure-kusto-data/azure/kusto/data/_converters.py +++ b/azure-kusto-data/azure/kusto/data/_converters.py @@ -10,6 +10,32 @@ _TIMESPAN_PATTERN = re.compile(r"(-?)((?P[0-9]*).)?(?P[0-9]{2}):(?P[0-9]{2}):(?P[0-9]{2}(\.[0-9]+)?$)") +def to_high_precision_type(kusto_type, raw_value, typed_value): + import pandas as pd + if kusto_type == "datetime": + return pd.to_datetime(raw_value) + + if kusto_type == "timespan": + if isinstance(raw_value, (six.integer_types, float)): + # https://docs.microsoft.com/en-us/dotnet/api/system.datetime.ticks + # kusto saves up to ticks, 1 tick == 100 nanoseconds + return pd.Timedelta(raw_value * 100, unit='ns') + if isinstance(raw_value, six.string_types): + time_parts = raw_value.split('.') + if len(time_parts) == 3: + seconds_fractions_part = time_parts[-1] + whole_part = int(typed_value.total_seconds()) + fractions = str(whole_part) + "." + seconds_fractions_part + total_seconds = float(fractions) + + return pd.Timedelta(total_seconds, unit='s') + else: + return pd.Timedelta(typed_value) + + return typed_value.total_seconds() + + raise ValueError("Unknown type {t}".format(kusto_type)) + def to_datetime(value): """Converts a string to a datetime.""" if value is None: @@ -18,13 +44,7 @@ def to_datetime(value): if isinstance(value, six.integer_types): parsed = parser.parse(value) - parsed = parser.isoparse(value) - - if parsed.tzinfo is None: - parsed = parsed.replace(tzinfo=UTC) - - return parsed - + return parser.isoparse(value) def to_timedelta(value): """Converts a string to a timedelta.""" diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index 0cbddaca..da91d026 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -17,30 +17,6 @@ keep_high_precision_values = False -def _get_precise_repr(t, raw_value, typed_value, **kwargs): - if t == "datetime": - lookback = kwargs.get("lookback") - seventh_char = kwargs.get("seventh_char") - last = kwargs.get("last") - - if seventh_char and seventh_char.isdigit(): - return raw_value[:-lookback] + seventh_char + "00" + last - - return raw_value - elif t == "timespan": - seconds_fractions_part = kwargs.get("seconds_fractions_part") - if seconds_fractions_part: - whole_part = int(typed_value.total_seconds()) - fractions = str(whole_part) + "." + seconds_fractions_part - total_seconds = float(fractions) - - return total_seconds - - return typed_value.total_seconds() - else: - raise ValueError("Unknown type {t}".format(t)) - - class WellKnownDataSet(Enum): """Categorizes data tables according to the role they play in the data set that a Kusto query returns.""" @@ -59,7 +35,7 @@ def __init__(self, columns, row): self._value_by_name = {} self._value_by_index = [] self._hidden_values = [] - self._seventh_digit = {} + for i, value in enumerate(row): column = columns[i] try: @@ -76,58 +52,17 @@ def __init__(self, columns, row): typed_value = None if keep_high_precision_values: self._hidden_values.append(None) - - else: - seconds_fractions_part = None - seventh_char = None - last = value[-1] if isinstance(value, six.string_types) and value[-1].isalpha() else "" - lookback = None - - try: - # If you are here to read this, you probably hit some datetime/timedelta inconsistencies. - # Azure-Data-Explorer(Kusto) supports 7 decimal digits, while the corresponding python types supports only 6. - # What we do here, is remove the 7th digit, if exists, and create a datetime/timedelta - # from whats left. The reason we are keeping the 7th digit, is to allow users to work with - # this precision in case they want it. One example why one might want this precision, is when - # working with pandas. In that case, use azure.kusto.data.helpers.dataframe_from_result_table - # which takes into account the 7th digit. - seconds_part = value.split(":")[2] - seconds_fractions_part = seconds_part.split(".")[1] - seventh_char = seconds_fractions_part[6] - - if seventh_char.isdigit(): - tick = int(seventh_char) - lookback = 2 if last else 1 - typed_value = KustoResultRow.convertion_funcs[column_type](value[:-lookback] + last) - - if tick: - if column_type == "datetime": - self._seventh_digit[column.column_name] = tick - elif column_type == "timespan": - self._seventh_digit[column.column_name] = ( - tick if abs(typed_value) == typed_value else -tick - ) - else: - raise TypeError("Unexpected type {}".format(column_type)) - else: - typed_value = KustoResultRow.convertion_funcs[column_type](value) - except (IndexError, AttributeError): - typed_value = KustoResultRow.convertion_funcs[column_type](value) + else: + # If you are here to read this, you probably hit some datetime/timedelta inconsistencies. + # Azure-Data-Explorer(Kusto) supports 7 decimal digits, while the corresponding python types supports only 6. + # One example why one might want this precision, is when working with pandas. + # In that case, use azure.kusto.data.helpers.dataframe_from_result_table which takes into account the original value. + typed_value = KustoResultRow.convertion_funcs[column_type](value) # this is a special case where plain python will lose precision, so we keep the precise value hidden # when transforming to pandas, we can use the hidden value to convert to precise pandas/numpy types if keep_high_precision_values: - self._hidden_values.append( - _get_precise_repr( - column_type, - value, - typed_value, - seconds_fractions_part=seconds_fractions_part, - last=last, - lookback=lookback, - seventh_char=seventh_char, - ) - ) + self._hidden_values.append(_converters.to_high_precision_type(column_type,value, typed_value)) elif column_type in KustoResultRow.convertion_funcs: typed_value = KustoResultRow.convertion_funcs[column_type](value) if keep_high_precision_values: diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index 733028ff..840b22ec 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -23,12 +23,6 @@ def dataframe_from_result_table(table): # fix types for col in table.columns: if col.column_type == "bool": - frame[col.column_name] = frame[col.column_name].astype(bool) - elif col.column_type == "datetime": - # as string first because can be None due to previous conversions - frame[col.column_name] = pandas.to_datetime(frame[col.column_name], utc=True).dt.tz_convert(UTC) - elif col.column_type == "timespan": - # as string first because can be None due to previous conversions - frame[col.column_name] = pandas.to_timedelta(frame[col.column_name], unit="s") + frame[col.column_name] = frame[col.column_name].astype(bool) return frame diff --git a/azure-kusto-data/tests/test_functional.py b/azure-kusto-data/tests/test_functional.py index 78fadd4f..840da175 100644 --- a/azure-kusto-data/tests/test_functional.py +++ b/azure-kusto-data/tests/test_functional.py @@ -213,11 +213,10 @@ def test_valid_response(self): self.assertEqual(type(row[4]), bool if row[4] is not None else type(None)) self.assertEqual(type(row[5]), timedelta if row[5] is not None else type(None)) - for i in range(0, len(primary_table)): - row = primary_table[i] - expected_row = expected_table[i] - for j in range(0, len(row)): - self.assertEqual(row[j], expected_row[j]) + for row_index, row in enumerate(primary_table): + expected_row = expected_table[row_index] + for col_index, value in enumerate(row): + self.assertEqual(value, expected_row[col_index]) def test_invalid_table(self): """Tests calling of table with index that doesn't exists.""" @@ -236,3 +235,10 @@ def test_iterating_after_end(self): """Tests StopIteration is raised when the response ends.""" response = KustoResponseDataSetV2(json.loads(RESPONSE_TEXT)) self.assertEqual(sum(1 for _ in response.primary_results[0]), 3) + + def test_row_equality(self): + """Tests the rows are idempotent.""" + response = KustoResponseDataSetV2(json.loads(RESPONSE_TEXT)) + table = response.primary_results[0] + for row_index, row in enumerate(table): + self.assertEqual(table[row_index], row) diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index f58dc211..a7b52303 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -171,12 +171,18 @@ def test_sanity_query(self, mock_post): expected["xsmalltext"] = DIGIT_WORDS[int(expected["xint16"])] expected["xtext"] = DIGIT_WORDS[int(expected["xint16"])] expected["xnumberAsText"] = text_type(expected["xint16"]) - expected["xtime"] = ( + + next_time = ( timedelta() if expected["xtime"] is None - else (abs(expected["xtime"]) + timedelta(days=1, seconds=1, microseconds=1000)) - * (-1) ** (expected["rownumber"] + 1) + else (abs(expected["xtime"]) + timedelta(days=1, seconds=1, microseconds=1000)) * (-1) ** (expected["rownumber"] + 1) ) + + # hacky tests - because time here is relative to previous row, after we pass a time where we have > 500 nanoseconds, + # another microseconds digit is needed + if expected["rownumber"] + 1 == 6: + next_time += timedelta(microseconds=1) + expected["xtime"] = next_time if expected["xint16"] > 0: expected["xdynamicWithNulls"] = {"rowId": expected["xint16"], "arr": [0, expected["xint16"]]} @@ -241,16 +247,16 @@ def test_sanity_data_frame(self, mock_post): "xdate": Series( [ pandas.to_datetime(None), - pandas.to_datetime("2014-01-01T01:01:01.0000000Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2015-01-01T01:01:01.0000001Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2016-01-01T01:01:01.0000002Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2017-01-01T01:01:01.0000003Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2018-01-01T01:01:01.0000004Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2019-01-01T01:01:01.0000005Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2020-01-01T01:01:01.0000006Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2021-01-01T01:01:01.0000007Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2022-01-01T01:01:01.0000008Z", utc=True).tz_convert(UTC), - pandas.to_datetime("2023-01-01T01:01:01.0000009Z", utc=True).tz_convert(UTC), + pandas.to_datetime("2014-01-01T01:01:01.0000000Z"), + pandas.to_datetime("2015-01-01T01:01:01.0000001Z"), + pandas.to_datetime("2016-01-01T01:01:01.0000002Z"), + pandas.to_datetime("2017-01-01T01:01:01.0000003Z"), + pandas.to_datetime("2018-01-01T01:01:01.0000004Z"), + pandas.to_datetime("2019-01-01T01:01:01.0000005Z"), + pandas.to_datetime("2020-01-01T01:01:01.0000006Z"), + pandas.to_datetime("2021-01-01T01:01:01.0000007Z"), + pandas.to_datetime("2022-01-01T01:01:01.0000008Z"), + pandas.to_datetime("2023-01-01T01:01:01.0000009Z"), ] ), "xsmalltext": Series( From a0a71111602664d74822199ce0ccefb13d352edb Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 14 Apr 2019 16:04:56 +0300 Subject: [PATCH 09/13] black fixes --- .../azure/kusto/data/_converters.py | 21 +++++++++++-------- azure-kusto-data/azure/kusto/data/_models.py | 8 +++---- azure-kusto-data/azure/kusto/data/helpers.py | 2 +- azure-kusto-data/tests/test_functional.py | 2 +- azure-kusto-data/tests/test_kusto_client.py | 11 +++++----- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_converters.py b/azure-kusto-data/azure/kusto/data/_converters.py index 0ecc7b62..17568e36 100644 --- a/azure-kusto-data/azure/kusto/data/_converters.py +++ b/azure-kusto-data/azure/kusto/data/_converters.py @@ -12,30 +12,32 @@ def to_high_precision_type(kusto_type, raw_value, typed_value): import pandas as pd - if kusto_type == "datetime": + + if kusto_type == "datetime": return pd.to_datetime(raw_value) - + if kusto_type == "timespan": if isinstance(raw_value, (six.integer_types, float)): # https://docs.microsoft.com/en-us/dotnet/api/system.datetime.ticks - # kusto saves up to ticks, 1 tick == 100 nanoseconds - return pd.Timedelta(raw_value * 100, unit='ns') + # kusto saves up to ticks, 1 tick == 100 nanoseconds + return pd.Timedelta(raw_value * 100, unit="ns") if isinstance(raw_value, six.string_types): - time_parts = raw_value.split('.') + time_parts = raw_value.split(".") if len(time_parts) == 3: seconds_fractions_part = time_parts[-1] whole_part = int(typed_value.total_seconds()) fractions = str(whole_part) + "." + seconds_fractions_part total_seconds = float(fractions) - - return pd.Timedelta(total_seconds, unit='s') + + return pd.Timedelta(total_seconds, unit="s") else: return pd.Timedelta(typed_value) return typed_value.total_seconds() - + raise ValueError("Unknown type {t}".format(kusto_type)) + def to_datetime(value): """Converts a string to a datetime.""" if value is None: @@ -44,7 +46,8 @@ def to_datetime(value): if isinstance(value, six.integer_types): parsed = parser.parse(value) - return parser.isoparse(value) + return parser.isoparse(value) + def to_timedelta(value): """Converts a string to a timedelta.""" diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index da91d026..2e3fa820 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -52,17 +52,17 @@ def __init__(self, columns, row): typed_value = None if keep_high_precision_values: self._hidden_values.append(None) - else: + else: # If you are here to read this, you probably hit some datetime/timedelta inconsistencies. # Azure-Data-Explorer(Kusto) supports 7 decimal digits, while the corresponding python types supports only 6. - # One example why one might want this precision, is when working with pandas. - # In that case, use azure.kusto.data.helpers.dataframe_from_result_table which takes into account the original value. + # One example why one might want this precision, is when working with pandas. + # In that case, use azure.kusto.data.helpers.dataframe_from_result_table which takes into account the original value. typed_value = KustoResultRow.convertion_funcs[column_type](value) # this is a special case where plain python will lose precision, so we keep the precise value hidden # when transforming to pandas, we can use the hidden value to convert to precise pandas/numpy types if keep_high_precision_values: - self._hidden_values.append(_converters.to_high_precision_type(column_type,value, typed_value)) + self._hidden_values.append(_converters.to_high_precision_type(column_type, value, typed_value)) elif column_type in KustoResultRow.convertion_funcs: typed_value = KustoResultRow.convertion_funcs[column_type](value) if keep_high_precision_values: diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index 840b22ec..78c0ae3a 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -23,6 +23,6 @@ def dataframe_from_result_table(table): # fix types for col in table.columns: if col.column_type == "bool": - frame[col.column_name] = frame[col.column_name].astype(bool) + frame[col.column_name] = frame[col.column_name].astype(bool) return frame diff --git a/azure-kusto-data/tests/test_functional.py b/azure-kusto-data/tests/test_functional.py index 840da175..48fc21e4 100644 --- a/azure-kusto-data/tests/test_functional.py +++ b/azure-kusto-data/tests/test_functional.py @@ -213,7 +213,7 @@ def test_valid_response(self): self.assertEqual(type(row[4]), bool if row[4] is not None else type(None)) self.assertEqual(type(row[5]), timedelta if row[5] is not None else type(None)) - for row_index, row in enumerate(primary_table): + for row_index, row in enumerate(primary_table): expected_row = expected_table[row_index] for col_index, value in enumerate(row): self.assertEqual(value, expected_row[col_index]) diff --git a/azure-kusto-data/tests/test_kusto_client.py b/azure-kusto-data/tests/test_kusto_client.py index a7b52303..c5b0ab62 100644 --- a/azure-kusto-data/tests/test_kusto_client.py +++ b/azure-kusto-data/tests/test_kusto_client.py @@ -171,18 +171,19 @@ def test_sanity_query(self, mock_post): expected["xsmalltext"] = DIGIT_WORDS[int(expected["xint16"])] expected["xtext"] = DIGIT_WORDS[int(expected["xint16"])] expected["xnumberAsText"] = text_type(expected["xint16"]) - + next_time = ( timedelta() if expected["xtime"] is None - else (abs(expected["xtime"]) + timedelta(days=1, seconds=1, microseconds=1000)) * (-1) ** (expected["rownumber"] + 1) + else (abs(expected["xtime"]) + timedelta(days=1, seconds=1, microseconds=1000)) + * (-1) ** (expected["rownumber"] + 1) ) - # hacky tests - because time here is relative to previous row, after we pass a time where we have > 500 nanoseconds, + # hacky tests - because time here is relative to previous row, after we pass a time where we have > 500 nanoseconds, # another microseconds digit is needed if expected["rownumber"] + 1 == 6: - next_time += timedelta(microseconds=1) - expected["xtime"] = next_time + next_time += timedelta(microseconds=1) + expected["xtime"] = next_time if expected["xint16"] > 0: expected["xdynamicWithNulls"] = {"rowId": expected["xint16"], "arr": [0, expected["xint16"]]} From 5c5c91da12b7b975334bb49286f10ca82b8d7fc1 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 14 Apr 2019 16:25:40 +0300 Subject: [PATCH 10/13] update dateutil - fix for more than 6 digit precision --- azure-kusto-data/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-kusto-data/setup.py b/azure-kusto-data/setup.py index 36d7ad20..fcbdb6cd 100644 --- a/azure-kusto-data/setup.py +++ b/azure-kusto-data/setup.py @@ -44,6 +44,6 @@ namespace_packages=["azure"], keywords="kusto wrapper client library", packages=find_packages(exclude=["azure", "tests"]), - install_requires=["adal>=1.0.0", "python-dateutil>=2.7.0", "requests>=2.13.0", "six>=1.10.0"], + install_requires=["adal>=1.0.0", "python-dateutil>=2.8.0", "requests>=2.13.0", "six>=1.10.0"], extras_require={"pandas": ["pandas==0.24.1"], ":python_version<'3.0'": ["azure-nspkg"]}, ) From 4b43584b21a21c26bd13e9f7b09d1c15e2756ecb Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Sun, 14 Apr 2019 18:12:45 +0300 Subject: [PATCH 11/13] pr fixes --- .../azure/kusto/data/_converters.py | 30 +--------------- azure-kusto-data/azure/kusto/data/_models.py | 20 ++++++----- azure-kusto-data/azure/kusto/data/helpers.py | 36 ++++++++++++++++--- 3 files changed, 45 insertions(+), 41 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_converters.py b/azure-kusto-data/azure/kusto/data/_converters.py index 17568e36..a5d8f504 100644 --- a/azure-kusto-data/azure/kusto/data/_converters.py +++ b/azure-kusto-data/azure/kusto/data/_converters.py @@ -10,41 +10,13 @@ _TIMESPAN_PATTERN = re.compile(r"(-?)((?P[0-9]*).)?(?P[0-9]{2}):(?P[0-9]{2}):(?P[0-9]{2}(\.[0-9]+)?$)") -def to_high_precision_type(kusto_type, raw_value, typed_value): - import pandas as pd - - if kusto_type == "datetime": - return pd.to_datetime(raw_value) - - if kusto_type == "timespan": - if isinstance(raw_value, (six.integer_types, float)): - # https://docs.microsoft.com/en-us/dotnet/api/system.datetime.ticks - # kusto saves up to ticks, 1 tick == 100 nanoseconds - return pd.Timedelta(raw_value * 100, unit="ns") - if isinstance(raw_value, six.string_types): - time_parts = raw_value.split(".") - if len(time_parts) == 3: - seconds_fractions_part = time_parts[-1] - whole_part = int(typed_value.total_seconds()) - fractions = str(whole_part) + "." + seconds_fractions_part - total_seconds = float(fractions) - - return pd.Timedelta(total_seconds, unit="s") - else: - return pd.Timedelta(typed_value) - - return typed_value.total_seconds() - - raise ValueError("Unknown type {t}".format(kusto_type)) - - def to_datetime(value): """Converts a string to a datetime.""" if value is None: return None if isinstance(value, six.integer_types): - parsed = parser.parse(value) + return parser.parse(value) return parser.isoparse(value) diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index 2e3fa820..9a65ba8a 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -9,12 +9,13 @@ from .exceptions import KustoServiceError -keep_high_precision_values = True +has_pandas = True try: import pandas + from .helpers import to_pandas_datetime, to_pandas_timedelta except: - keep_high_precision_values = False + has_pandas = False class WellKnownDataSet(Enum): @@ -43,14 +44,14 @@ def __init__(self, columns, row): except AttributeError: self._value_by_index.append(value) self._value_by_name[columns[i]] = value - if keep_high_precision_values: + if has_pandas: self._hidden_values.append(value) continue if column_type in ["datetime", "timespan"]: if value is None: typed_value = None - if keep_high_precision_values: + if has_pandas: self._hidden_values.append(None) else: # If you are here to read this, you probably hit some datetime/timedelta inconsistencies. @@ -61,15 +62,18 @@ def __init__(self, columns, row): # this is a special case where plain python will lose precision, so we keep the precise value hidden # when transforming to pandas, we can use the hidden value to convert to precise pandas/numpy types - if keep_high_precision_values: - self._hidden_values.append(_converters.to_high_precision_type(column_type, value, typed_value)) + if has_pandas: + if column_type == "datetime": + self._hidden_values.append(to_pandas_datetime(value)) + if column_type == "timespan": + self._hidden_values.append(to_pandas_timedelta(value, typed_value)) elif column_type in KustoResultRow.convertion_funcs: typed_value = KustoResultRow.convertion_funcs[column_type](value) - if keep_high_precision_values: + if has_pandas: self._hidden_values.append(value) else: typed_value = value - if keep_high_precision_values: + if has_pandas: self._hidden_values.append(value) self._value_by_index.append(typed_value) diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index 78c0ae3a..71827271 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -1,11 +1,39 @@ """Kusto helper functions""" +import six -import pandas -from ._models import KustoResultTable -from dateutil.tz import UTC + +def to_pandas_datetime(raw_value): + import pandas as pd + + return pd.to_datetime(raw_value) + + +def to_pandas_timedelta(raw_value, timedelta_value): + import pandas as pd + + if isinstance(raw_value, (six.integer_types, float)): + # https://docs.microsoft.com/en-us/dotnet/api/system.datetime.ticks + # kusto saves up to ticks, 1 tick == 100 nanoseconds + return pd.Timedelta(raw_value * 100, unit="ns") + if isinstance(raw_value, six.string_types): + fraction = raw_value.split(".")[-1] + if fraction.isdigit(): + whole_part = int(timedelta_value.total_seconds()) + time_with_exact_fraction = str(whole_part) + "." + fraction + total_seconds = float(time_with_exact_fraction) + + return pd.Timedelta(total_seconds, unit="s") + else: + return pd.Timedelta(timedelta_value) + + return pd.Timedelta(timedelta_value.total_seconds(), unit="ns") def dataframe_from_result_table(table): + import pandas as pd + from ._models import KustoResultTable + from dateutil.tz import UTC + """Converts Kusto tables into pandas DataFrame. :param azure.kusto.data._models.KustoResultTable table: Table received from the response. :return: pandas DataFrame. @@ -18,7 +46,7 @@ def dataframe_from_result_table(table): raise TypeError("Expected KustoResultTable got {}".format(type(table).__name__)) columns = [col.column_name for col in table.columns] - frame = pandas.DataFrame(table._rows, columns=columns) + frame = pd.DataFrame(table._rows, columns=columns) # fix types for col in table.columns: From bc43aec57f3844a1abeaa372763b8f148231170c Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Mon, 15 Apr 2019 10:44:11 +0300 Subject: [PATCH 12/13] minor PR fixes --- azure-kusto-data/azure/kusto/data/_converters.py | 1 - azure-kusto-data/azure/kusto/data/_models.py | 16 ++++++++-------- azure-kusto-data/azure/kusto/data/helpers.py | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/azure-kusto-data/azure/kusto/data/_converters.py b/azure-kusto-data/azure/kusto/data/_converters.py index a5d8f504..30f14bc8 100644 --- a/azure-kusto-data/azure/kusto/data/_converters.py +++ b/azure-kusto-data/azure/kusto/data/_converters.py @@ -3,7 +3,6 @@ from datetime import timedelta import re from dateutil import parser -from dateutil.tz import UTC import six # Regex for TimeSpan diff --git a/azure-kusto-data/azure/kusto/data/_models.py b/azure-kusto-data/azure/kusto/data/_models.py index 9a65ba8a..6846911d 100644 --- a/azure-kusto-data/azure/kusto/data/_models.py +++ b/azure-kusto-data/azure/kusto/data/_models.py @@ -9,13 +9,13 @@ from .exceptions import KustoServiceError -has_pandas = True +HAS_PANDAS = True try: import pandas from .helpers import to_pandas_datetime, to_pandas_timedelta -except: - has_pandas = False +except ImportError: + HAS_PANDAS = False class WellKnownDataSet(Enum): @@ -44,14 +44,14 @@ def __init__(self, columns, row): except AttributeError: self._value_by_index.append(value) self._value_by_name[columns[i]] = value - if has_pandas: + if HAS_PANDAS: self._hidden_values.append(value) continue if column_type in ["datetime", "timespan"]: if value is None: typed_value = None - if has_pandas: + if HAS_PANDAS: self._hidden_values.append(None) else: # If you are here to read this, you probably hit some datetime/timedelta inconsistencies. @@ -62,18 +62,18 @@ def __init__(self, columns, row): # this is a special case where plain python will lose precision, so we keep the precise value hidden # when transforming to pandas, we can use the hidden value to convert to precise pandas/numpy types - if has_pandas: + if HAS_PANDAS: if column_type == "datetime": self._hidden_values.append(to_pandas_datetime(value)) if column_type == "timespan": self._hidden_values.append(to_pandas_timedelta(value, typed_value)) elif column_type in KustoResultRow.convertion_funcs: typed_value = KustoResultRow.convertion_funcs[column_type](value) - if has_pandas: + if HAS_PANDAS: self._hidden_values.append(value) else: typed_value = value - if has_pandas: + if HAS_PANDAS: self._hidden_values.append(value) self._value_by_index.append(typed_value) diff --git a/azure-kusto-data/azure/kusto/data/helpers.py b/azure-kusto-data/azure/kusto/data/helpers.py index 71827271..29e9368e 100644 --- a/azure-kusto-data/azure/kusto/data/helpers.py +++ b/azure-kusto-data/azure/kusto/data/helpers.py @@ -35,7 +35,7 @@ def dataframe_from_result_table(table): from dateutil.tz import UTC """Converts Kusto tables into pandas DataFrame. - :param azure.kusto.data._models.KustoResultTable table: Table received from the response. + :param azure.kusto.data._models.KustoResultTable table: Table received from the response. :return: pandas DataFrame. :rtype: pandas.DataFrame """ From 1ccac75145c20c22c2ecac7f37aab0e59ed23166 Mon Sep 17 00:00:00 2001 From: Daniel Dubovski Date: Mon, 15 Apr 2019 10:55:21 +0300 Subject: [PATCH 13/13] removed newline --- azure-kusto-data/azure/kusto/data/_converters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/azure-kusto-data/azure/kusto/data/_converters.py b/azure-kusto-data/azure/kusto/data/_converters.py index 30f14bc8..5dea612a 100644 --- a/azure-kusto-data/azure/kusto/data/_converters.py +++ b/azure-kusto-data/azure/kusto/data/_converters.py @@ -16,7 +16,6 @@ def to_datetime(value): if isinstance(value, six.integer_types): return parser.parse(value) - return parser.isoparse(value)