From b47fefdda6297b914f19fb1ee2ec8bab0c1d6f71 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 13:54:02 -0500 Subject: [PATCH 01/10] Inline relation SQL instead of input views --- python/versus/comparison/_compute.py | 4 +- python/versus/comparison/_helpers.py | 98 +++++++++++++----------- python/versus/comparison/_slices.py | 4 +- python/versus/comparison/_value_diffs.py | 8 +- python/versus/comparison/_weave.py | 8 +- 5 files changed, 66 insertions(+), 56 deletions(-) diff --git a/python/versus/comparison/_compute.py b/python/versus/comparison/_compute.py index 49214e0..7efb59c 100644 --- a/python/versus/comparison/_compute.py +++ b/python/versus/comparison/_compute.py @@ -263,8 +263,8 @@ def key_part(identifier: str) -> str: {h.sql_literal(identifier)} AS table_name, {select_by} FROM - {h.ident(handle_left.name)} AS left_tbl - ANTI JOIN {h.ident(handle_right.name)} AS right_tbl + {h.table_ref(handle_left)} AS left_tbl + ANTI JOIN {h.table_ref(handle_right)} AS right_tbl ON {condition} """ diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index 44c8f28..8efc92d 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -34,6 +34,8 @@ class _TableHandle: relation: duckdb.DuckDBPyRelation columns: List[str] types: Dict[str, str] + source_sql: str + source_is_identifier: bool def __getattr__(self, name: str) -> Any: return getattr(self.relation, name) @@ -200,7 +202,7 @@ def assert_unique_by( {cols}, COUNT(*) AS n FROM - {ident(handle.name)} AS t + {table_ref(handle)} AS t GROUP BY {cols} HAVING @@ -309,40 +311,13 @@ def register_input_view( ) -> _TableHandle: name = f"__versus_{label}_{uuid.uuid4().hex}" display = "relation" - base_name = None - relation_source = False if isinstance(source, duckdb.DuckDBPyRelation): - relation_source = True - base_name = f"{name}_base" validate_columns(source.columns, label) - source.to_view(base_name, replace=True) - source_ref = ident(base_name) + source_sql = source.sql_query() display = getattr(source, "alias", "relation") - elif isinstance(source, str): - raise ComparisonError( - "String inputs are not supported. Pass a DuckDB relation or pandas/polars " - "DataFrame." - ) - else: - base_name = f"{name}_base" - source_columns = getattr(source, "columns", None) - if source_columns is not None: - validate_columns(list(source_columns), label) try: - conn.register(base_name, source) - except Exception as exc: - raise ComparisonError( - "Inputs must be DuckDB relations or pandas/polars DataFrames." - ) from exc - source_ref = ident(base_name) - display = type(source).__name__ - - try: - conn.execute( - f"CREATE OR REPLACE TEMP VIEW {ident(name)} AS SELECT * FROM {source_ref}" - ) - except duckdb.Error as exc: - if relation_source and base_name is not None and base_name in str(exc): + columns, types = describe_source(conn, source_sql, is_identifier=False) + except duckdb.Error as exc: arg_name = f"table_{label}" if connection_supplied: hint = ( @@ -357,24 +332,53 @@ def register_input_view( "`connection=...`." ) raise ComparisonError(hint) from exc - raise - if base_name is not None: - conn.versus.views.append(base_name) + relation = conn.sql(source_sql) + return _TableHandle( + name=name, + display=display, + relation=relation, + columns=columns, + types=types, + source_sql=source_sql, + source_is_identifier=False, + ) + if isinstance(source, str): + raise ComparisonError( + "String inputs are not supported. Pass a DuckDB relation or pandas/polars " + "DataFrame." + ) + source_columns = getattr(source, "columns", None) + if source_columns is not None: + validate_columns(list(source_columns), label) + try: + conn.register(name, source) + except Exception as exc: + raise ComparisonError( + "Inputs must be DuckDB relations or pandas/polars DataFrames." + ) from exc conn.versus.views.append(name) - - columns, types = describe_view(conn, name) + source_sql = name + columns, types = describe_source(conn, source_sql, is_identifier=True) relation = conn.table(name) return _TableHandle( name=name, - display=display, + display=type(source).__name__, relation=relation, columns=columns, types=types, + source_sql=source_sql, + source_is_identifier=True, ) -def describe_view(conn: VersusConn, name: str) -> Tuple[List[str], Dict[str, str]]: - rel = run_sql(conn, f"DESCRIBE SELECT * FROM {ident(name)}") +def describe_source( + conn: VersusConn, + source_sql: str, + *, + is_identifier: bool, +) -> Tuple[List[str], Dict[str, str]]: + source_ref = ident(source_sql) if is_identifier else f"({source_sql})" + rel = run_sql(conn, f"DESCRIBE SELECT * FROM {source_ref}") rows = rel.fetchall() columns = [row[0] for row in rows] types = {row[0]: row[1] for row in rows} @@ -391,6 +395,12 @@ def col(alias: str, column: str) -> str: return f"{alias}.{ident(column)}" +def table_ref(handle: _TableHandle) -> str: + if handle.source_is_identifier: + return ident(handle.source_sql) + return f"({handle.source_sql})" + + def select_cols(columns: Sequence[str], alias: Optional[str] = None) -> str: if not columns: raise ComparisonError("Column list must be non-empty") @@ -414,8 +424,8 @@ def inputs_join_sql( ) -> str: join_condition_sql = join_condition(by_columns, "a", "b") return ( - f"{ident(handles[table_id[0]].name)} AS a\n" - f" INNER JOIN {ident(handles[table_id[1]].name)} AS b\n" + f"{table_ref(handles[table_id[0]])} AS a\n" + f" INNER JOIN {table_ref(handles[table_id[1]])} AS b\n" f" ON {join_condition_sql}" ) @@ -484,7 +494,7 @@ def fetch_rows_by_keys( {select_cols_sql} FROM ({key_sql}) AS keys - JOIN {ident(comparison._handles[table].name)} AS base + JOIN {table_ref(comparison._handles[table])} AS base ON {join_condition_sql} """ return run_sql(comparison.connection, sql) @@ -580,10 +590,10 @@ def select_zero_from_table( ) -> duckdb.DuckDBPyRelation: handle = comparison._handles[table] if columns is None: - sql = f"SELECT * FROM {ident(handle.name)} LIMIT 0" + sql = f"SELECT * FROM {table_ref(handle)} LIMIT 0" return run_sql(comparison.connection, sql) if not columns: raise ComparisonError("Column list must be non-empty") select_cols_sql = select_cols(columns) - sql = f"SELECT {select_cols_sql} FROM {ident(handle.name)} LIMIT 0" + sql = f"SELECT {select_cols_sql} FROM {table_ref(handle)} LIMIT 0" return run_sql(comparison.connection, sql) diff --git a/python/versus/comparison/_slices.py b/python/versus/comparison/_slices.py index bf9580c..74e443a 100644 --- a/python/versus/comparison/_slices.py +++ b/python/versus/comparison/_slices.py @@ -85,13 +85,13 @@ def slice_unmatched_both(comparison: "Comparison") -> duckdb.DuckDBPyRelation: def select_for(table_name: str) -> str: unmatched_keys_sql = build_unmatched_keys_sql(comparison, table_name) - base_table = comparison._handles[table_name].name + base_table = comparison._handles[table_name] return f""" SELECT {h.sql_literal(table_name)} AS table_name, {select_cols} FROM - {h.ident(base_table)} AS base + {h.table_ref(base_table)} AS base JOIN ({unmatched_keys_sql}) AS keys ON {join_condition} """ diff --git a/python/versus/comparison/_value_diffs.py b/python/versus/comparison/_value_diffs.py index 61f8f2c..b44756c 100644 --- a/python/versus/comparison/_value_diffs.py +++ b/python/versus/comparison/_value_diffs.py @@ -60,9 +60,9 @@ def _value_diffs_with_diff_table( {", ".join(select_cols)} FROM ({key_sql}) AS keys - JOIN {h.ident(comparison._handles[table_a].name)} AS a + JOIN {h.table_ref(comparison._handles[table_a])} AS a ON {join_a} - JOIN {h.ident(comparison._handles[table_b].name)} AS b + JOIN {h.table_ref(comparison._handles[table_b])} AS b ON {join_b} """ return h.run_sql(comparison.connection, sql) @@ -112,9 +112,9 @@ def stack_value_diffs_sql( {", ".join(select_parts)} FROM ({key_sql}) AS keys - JOIN {h.ident(comparison._handles[table_a].name)} AS a + JOIN {h.table_ref(comparison._handles[table_a])} AS a ON {join_a} - JOIN {h.ident(comparison._handles[table_b].name)} AS b + JOIN {h.table_ref(comparison._handles[table_b])} AS b ON {join_b} """ diff --git a/python/versus/comparison/_weave.py b/python/versus/comparison/_weave.py index 4598106..9135194 100644 --- a/python/versus/comparison/_weave.py +++ b/python/versus/comparison/_weave.py @@ -92,9 +92,9 @@ def _weave_diffs_wide_with_keys( {", ".join(select_parts)} FROM ({keys}) AS keys - JOIN {h.ident(comparison._handles[table_a].name)} AS a + JOIN {h.table_ref(comparison._handles[table_a])} AS a ON {join_a} - JOIN {h.ident(comparison._handles[table_b].name)} AS b + JOIN {h.table_ref(comparison._handles[table_b])} AS b ON {join_b} """ return h.run_sql(comparison.connection, sql) @@ -153,7 +153,7 @@ def _weave_diffs_long_with_keys( {select_cols_a} FROM keys - JOIN {h.ident(comparison._handles[table_a].name)} AS a + JOIN {h.table_ref(comparison._handles[table_a])} AS a ON {join_a} UNION ALL SELECT @@ -162,7 +162,7 @@ def _weave_diffs_long_with_keys( {select_cols_b} FROM keys - JOIN {h.ident(comparison._handles[table_b].name)} AS b + JOIN {h.table_ref(comparison._handles[table_b])} AS b ON {join_b} ) AS stacked ORDER BY From a2fc4ca738659e657f8801f339bb9e3967d88921 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 13:55:04 -0500 Subject: [PATCH 02/10] Use dataframe row counts for table summary --- python/versus/comparison/_helpers.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index 8efc92d..c9dae26 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -36,6 +36,7 @@ class _TableHandle: types: Dict[str, str] source_sql: str source_is_identifier: bool + row_count: Optional[int] def __getattr__(self, name: str) -> Any: return getattr(self.relation, name) @@ -341,6 +342,7 @@ def register_input_view( types=types, source_sql=source_sql, source_is_identifier=False, + row_count=None, ) if isinstance(source, str): raise ComparisonError( @@ -350,6 +352,7 @@ def register_input_view( source_columns = getattr(source, "columns", None) if source_columns is not None: validate_columns(list(source_columns), label) + row_count = infer_row_count(source) try: conn.register(name, source) except Exception as exc: @@ -368,6 +371,7 @@ def register_input_view( types=types, source_sql=source_sql, source_is_identifier=True, + row_count=row_count, ) @@ -385,6 +389,16 @@ def describe_source( return columns, types +def infer_row_count(source: Any) -> Optional[int]: + shape = getattr(source, "shape", None) + if isinstance(shape, tuple) and shape and isinstance(shape[0], int): + return shape[0] + height = getattr(source, "height", None) + if isinstance(height, int): + return height + return None + + # --------------- SQL builder helpers def ident(name: str) -> str: escaped = name.replace('"', '""') @@ -578,6 +592,8 @@ def build_rows_relation( def table_count(relation: Union[duckdb.DuckDBPyRelation, _TableHandle]) -> int: + if isinstance(relation, _TableHandle) and relation.row_count is not None: + return relation.row_count row = relation.count("*").fetchall()[0] assert isinstance(row[0], int) return row[0] From c66d52f4fe563726aff3f3d726398523ec371c15 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 13:57:02 -0500 Subject: [PATCH 03/10] Probe relation connection without views --- python/versus/comparison/_helpers.py | 52 ++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index c9dae26..485f82e 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -316,23 +316,11 @@ def register_input_view( validate_columns(source.columns, label) source_sql = source.sql_query() display = getattr(source, "alias", "relation") + assert_relation_connection(conn, source, label, connection_supplied) try: columns, types = describe_source(conn, source_sql, is_identifier=False) except duckdb.Error as exc: - arg_name = f"table_{label}" - if connection_supplied: - hint = ( - f"`{arg_name}` appears to be bound to a different DuckDB " - "connection than the one passed to `compare()`. Pass the same " - "connection that created the relations via `connection=...`." - ) - else: - hint = ( - f"`{arg_name}` appears to be bound to a non-default DuckDB " - "connection. Pass that connection to `compare()` via " - "`connection=...`." - ) - raise ComparisonError(hint) from exc + raise_relation_connection_error(label, connection_supplied, exc) relation = conn.sql(source_sql) return _TableHandle( name=name, @@ -399,6 +387,42 @@ def infer_row_count(source: Any) -> Optional[int]: return None +def raise_relation_connection_error( + label: str, + connection_supplied: bool, + exc: Exception, +) -> None: + arg_name = f"table_{label}" + if connection_supplied: + hint = ( + f"`{arg_name}` appears to be bound to a different DuckDB " + "connection than the one passed to `compare()`. Pass the same " + "connection that created the relations via `connection=...`." + ) + else: + hint = ( + f"`{arg_name}` appears to be bound to a non-default DuckDB " + "connection. Pass that connection to `compare()` via " + "`connection=...`." + ) + raise ComparisonError(hint) from exc + + +def assert_relation_connection( + conn: VersusConn, + relation: duckdb.DuckDBPyRelation, + label: str, + connection_supplied: bool, +) -> None: + probe_name = f"__versus_probe_{uuid.uuid4().hex}" + try: + conn.register(probe_name, relation) + except Exception as exc: + raise_relation_connection_error(label, connection_supplied, exc) + else: + conn.unregister(probe_name) + + # --------------- SQL builder helpers def ident(name: str) -> str: escaped = name.replace('"', '""') From 3ac60f394fc57553432b1efc1335cd5f3b63b9d6 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 14:04:32 -0500 Subject: [PATCH 04/10] Centralize row count resolution --- python/versus/comparison/_helpers.py | 44 ++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index 485f82e..bb69b09 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -36,7 +36,7 @@ class _TableHandle: types: Dict[str, str] source_sql: str source_is_identifier: bool - row_count: Optional[int] + row_count: int def __getattr__(self, name: str) -> Any: return getattr(self.relation, name) @@ -321,6 +321,7 @@ def register_input_view( columns, types = describe_source(conn, source_sql, is_identifier=False) except duckdb.Error as exc: raise_relation_connection_error(label, connection_supplied, exc) + row_count = resolve_row_count(conn, source, source_sql, is_identifier=False) relation = conn.sql(source_sql) return _TableHandle( name=name, @@ -330,7 +331,7 @@ def register_input_view( types=types, source_sql=source_sql, source_is_identifier=False, - row_count=None, + row_count=row_count, ) if isinstance(source, str): raise ComparisonError( @@ -340,7 +341,6 @@ def register_input_view( source_columns = getattr(source, "columns", None) if source_columns is not None: validate_columns(list(source_columns), label) - row_count = infer_row_count(source) try: conn.register(name, source) except Exception as exc: @@ -350,6 +350,7 @@ def register_input_view( conn.versus.views.append(name) source_sql = name columns, types = describe_source(conn, source_sql, is_identifier=True) + row_count = resolve_row_count(conn, source, source_sql, is_identifier=True) relation = conn.table(name) return _TableHandle( name=name, @@ -369,7 +370,7 @@ def describe_source( *, is_identifier: bool, ) -> Tuple[List[str], Dict[str, str]]: - source_ref = ident(source_sql) if is_identifier else f"({source_sql})" + source_ref = source_ref_for_sql(source_sql, is_identifier) rel = run_sql(conn, f"DESCRIBE SELECT * FROM {source_ref}") rows = rel.fetchall() columns = [row[0] for row in rows] @@ -377,13 +378,32 @@ def describe_source( return columns, types -def infer_row_count(source: Any) -> Optional[int]: - shape = getattr(source, "shape", None) - if isinstance(shape, tuple) and shape and isinstance(shape[0], int): - return shape[0] - height = getattr(source, "height", None) - if isinstance(height, int): - return height +def source_ref_for_sql(source_sql: str, is_identifier: bool) -> str: + return ident(source_sql) if is_identifier else f"({source_sql})" + + +def resolve_row_count( + conn: VersusConn, + source: Any, + source_sql: str, + *, + is_identifier: bool, +) -> int: + frame_row_count = row_count_from_frame(source) + if frame_row_count is not None: + return frame_row_count + source_ref = source_ref_for_sql(source_sql, is_identifier) + row = run_sql(conn, f"SELECT COUNT(*) FROM {source_ref}").fetchone() + assert row is not None and isinstance(row[0], int) + return row[0] + + +def row_count_from_frame(source: Any) -> Optional[int]: + module = type(source).__module__ + if module.startswith("pandas"): + return int(source.shape[0]) + if module.startswith("polars"): + return int(source.height) return None @@ -616,7 +636,7 @@ def build_rows_relation( def table_count(relation: Union[duckdb.DuckDBPyRelation, _TableHandle]) -> int: - if isinstance(relation, _TableHandle) and relation.row_count is not None: + if isinstance(relation, _TableHandle): return relation.row_count row = relation.count("*").fetchall()[0] assert isinstance(row[0], int) From 8e704cb44362ea95e4b5b0a164c8c151fa76edb9 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 14:06:48 -0500 Subject: [PATCH 05/10] Rename input handle builder and tighten types --- python/versus/comparison/_core.py | 4 ++-- python/versus/comparison/_helpers.py | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/python/versus/comparison/_core.py b/python/versus/comparison/_core.py index 3b12f24..4d2d506 100644 --- a/python/versus/comparison/_core.py +++ b/python/versus/comparison/_core.py @@ -509,10 +509,10 @@ def compare( by_columns = h.normalize_column_list(by, "by", allow_empty=False) connection_supplied = connection is not None handles = { - clean_ids[0]: h.register_input_view( + clean_ids[0]: h.build_table_handle( conn, table_a, clean_ids[0], connection_supplied=connection_supplied ), - clean_ids[1]: h.register_input_view( + clean_ids[1]: h.build_table_handle( conn, table_b, clean_ids[1], connection_supplied=connection_supplied ), } diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index bb69b09..e150b67 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -303,9 +303,11 @@ def assert_column_allowed(comparison: "Comparison", column: str, func: str) -> N # --------------- Input registration and metadata -def register_input_view( +def build_table_handle( conn: VersusConn, - source: Any, + source: Union[ + duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame" + ], label: str, *, connection_supplied: bool, @@ -384,7 +386,9 @@ def source_ref_for_sql(source_sql: str, is_identifier: bool) -> str: def resolve_row_count( conn: VersusConn, - source: Any, + source: Union[ + duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame" + ], source_sql: str, *, is_identifier: bool, @@ -398,7 +402,11 @@ def resolve_row_count( return row[0] -def row_count_from_frame(source: Any) -> Optional[int]: +def row_count_from_frame( + source: Union[ + duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame" + ], +) -> Optional[int]: module = type(source).__module__ if module.startswith("pandas"): return int(source.shape[0]) From 2c74c135f42c19543953b6fa17cfbdc3e111769b Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 14:07:52 -0500 Subject: [PATCH 06/10] Type-check pandas/polars annotations --- python/versus/comparison/_helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index e150b67..6b1cbcf 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -23,6 +23,9 @@ from ._exceptions import ComparisonError if TYPE_CHECKING: # pragma: no cover + import pandas + import polars + from ._core import Comparison From c1c154a30d3a88e865aeae2a3b4c5ccb2f747c3c Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 14:08:38 -0500 Subject: [PATCH 07/10] Format type annotations --- python/versus/comparison/_helpers.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index 6b1cbcf..8ccbbc0 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -308,9 +308,7 @@ def assert_column_allowed(comparison: "Comparison", column: str, func: str) -> N # --------------- Input registration and metadata def build_table_handle( conn: VersusConn, - source: Union[ - duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame" - ], + source: Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"], label: str, *, connection_supplied: bool, @@ -389,9 +387,7 @@ def source_ref_for_sql(source_sql: str, is_identifier: bool) -> str: def resolve_row_count( conn: VersusConn, - source: Union[ - duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame" - ], + source: Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"], source_sql: str, *, is_identifier: bool, @@ -406,9 +402,7 @@ def resolve_row_count( def row_count_from_frame( - source: Union[ - duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame" - ], + source: Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"], ) -> Optional[int]: module = type(source).__module__ if module.startswith("pandas"): From fedd7ce27c052716152805bcf69c8e21b2d9657d Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 14:09:33 -0500 Subject: [PATCH 08/10] Type-narrow dataframe row counts --- python/versus/comparison/_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index 8ccbbc0..b4ae233 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -406,9 +406,9 @@ def row_count_from_frame( ) -> Optional[int]: module = type(source).__module__ if module.startswith("pandas"): - return int(source.shape[0]) + return int(cast("pandas.DataFrame", source).shape[0]) if module.startswith("polars"): - return int(source.height) + return int(cast("polars.DataFrame", source).height) return None From b10c8aaa6b240220158519c870b84437a06a2d8e Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 14:11:23 -0500 Subject: [PATCH 09/10] Add internal input type alias --- python/versus/comparison/_helpers.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index b4ae233..bc656c0 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -28,6 +28,13 @@ from ._core import Comparison +try: + from typing import TypeAlias +except ImportError: # pragma: no cover - Python < 3.10 + from typing_extensions import TypeAlias + +_Input: TypeAlias = Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"] + # --------------- Data structures @dataclass @@ -308,7 +315,7 @@ def assert_column_allowed(comparison: "Comparison", column: str, func: str) -> N # --------------- Input registration and metadata def build_table_handle( conn: VersusConn, - source: Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"], + source: _Input, label: str, *, connection_supplied: bool, @@ -387,7 +394,7 @@ def source_ref_for_sql(source_sql: str, is_identifier: bool) -> str: def resolve_row_count( conn: VersusConn, - source: Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"], + source: _Input, source_sql: str, *, is_identifier: bool, @@ -401,9 +408,7 @@ def resolve_row_count( return row[0] -def row_count_from_frame( - source: Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"], -) -> Optional[int]: +def row_count_from_frame(source: _Input) -> Optional[int]: module = type(source).__module__ if module.startswith("pandas"): return int(cast("pandas.DataFrame", source).shape[0]) From fd4ecac0fbce58738035e624fe746a86e826f84f Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 28 Dec 2025 14:11:50 -0500 Subject: [PATCH 10/10] Format input type alias --- python/versus/comparison/_helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/versus/comparison/_helpers.py b/python/versus/comparison/_helpers.py index bc656c0..9705ae1 100644 --- a/python/versus/comparison/_helpers.py +++ b/python/versus/comparison/_helpers.py @@ -33,7 +33,9 @@ except ImportError: # pragma: no cover - Python < 3.10 from typing_extensions import TypeAlias -_Input: TypeAlias = Union[duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame"] +_Input: TypeAlias = Union[ + duckdb.DuckDBPyRelation, "pandas.DataFrame", "polars.DataFrame" +] # --------------- Data structures