diff --git a/AGENTS.md b/AGENTS.md index 5306d6f..cb75a35 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,9 +23,9 @@ future contributors can work without hunting through old context. ## Key concepts - `compare()` accepts DuckDB relations or pandas/polars DataFrames. The - optional `connection` parameter must be provided when the relations were - created on non-default connections so subsequent helper queries run in the - correct database. + optional `con` parameter must be provided when the relations were created + on non-default connections so subsequent helper queries run in the correct + database. - The `Comparison` object stores: - table metadata (`tables`, `by`, `unmatched_cols`, `intersection`) - internal handles to the temp views plus a mapping of column name to diff --git a/README.md b/README.md index f74e18a..c060e7d 100644 --- a/README.md +++ b/README.md @@ -221,8 +221,8 @@ comparison.summary() ## Usage - Call `compare()` with DuckDB relations or pandas/polars DataFrames. If - your relations live on a custom DuckDB connection, pass it via - `connection=` so the comparison queries use the same database. + your relations live on a custom DuckDB connection, pass it via `con=` + so the comparison queries use the same database. - The `by` columns must uniquely identify rows in each table. When they do not, `compare()` raises `ComparisonError` and tells you which key values repeat. diff --git a/README.qmd b/README.qmd index 82f02fb..7a81a6b 100644 --- a/README.qmd +++ b/README.qmd @@ -103,8 +103,8 @@ comparison.summary() ## Usage - Call `compare()` with DuckDB relations or pandas/polars DataFrames. If - your relations live on a custom DuckDB connection, pass it via - `connection=` so the comparison queries use the same database. + your relations live on a custom DuckDB connection, pass it via `con=` + so the comparison queries use the same database. - The `by` columns must uniquely identify rows in each table. When they do not, `compare()` raises `ComparisonError` and tells you which key values repeat. diff --git a/docs_api/source/getting-started.rst b/docs_api/source/getting-started.rst index c1dc668..1c2e098 100644 --- a/docs_api/source/getting-started.rst +++ b/docs_api/source/getting-started.rst @@ -17,8 +17,8 @@ Inputs `compare()` accepts DuckDB relations (tables or views) or pandas/polars DataFrames. If you provide relations created on a non-default connection, -pass that connection into `compare()` so helper queries run in the same -session. +pass that connection into `compare()` via `con=` so helper queries run in +the same session. .. code-block:: pycon diff --git a/python/versus/comparison/_inputs.py b/python/versus/comparison/_inputs.py index 793e6e7..c098145 100644 --- a/python/versus/comparison/_inputs.py +++ b/python/versus/comparison/_inputs.py @@ -155,13 +155,12 @@ def raise_relation_connection_error( hint = ( f"`{arg_name}` appears to be bound to a different DuckDB " "connection than the one passed to `compare()`. Pass the same " - "connection that created the relations via `connection=...`." + "connection that created the relations via `con=...`." ) else: hint = ( f"`{arg_name}` appears to be bound to a non-default DuckDB " - "connection. Pass that connection to `compare()` via " - "`connection=...`." + "connection. Pass that connection to `compare()` via `con=...`." ) raise ComparisonError(hint) from exc diff --git a/python/versus/comparison/_validation.py b/python/versus/comparison/_validation.py index 8a7f01c..3f63c56 100644 --- a/python/versus/comparison/_validation.py +++ b/python/versus/comparison/_validation.py @@ -34,7 +34,7 @@ def resolve_connection( default_conn = duckdb.default_connection conn_candidate = default_conn() if callable(default_conn) else default_conn if not isinstance(conn_candidate, duckdb.DuckDBPyConnection): - raise ComparisonError("`connection` must be a DuckDB connection.") + raise ComparisonError("`con` must be a DuckDB connection.") return VersusConn(conn_candidate) diff --git a/python/versus/comparison/api.py b/python/versus/comparison/api.py index 3d81365..7949a82 100644 --- a/python/versus/comparison/api.py +++ b/python/versus/comparison/api.py @@ -27,7 +27,7 @@ def compare( allow_both_na: bool = True, coerce: bool = True, table_id: Tuple[str, str] = ("a", "b"), - connection: Optional[duckdb.DuckDBPyConnection] = None, + con: Optional[duckdb.DuckDBPyConnection] = None, materialize: Literal["all", "summary", "none"] = "all", ) -> Comparison: """Compare two DuckDB relations by key columns. @@ -45,7 +45,7 @@ def compare( exact type matches for shared columns. table_id : tuple[str, str], default ("a", "b") Labels used in outputs for the two tables. - connection : duckdb.DuckDBPyConnection, optional + con : duckdb.DuckDBPyConnection, optional DuckDB connection used to register the inputs and run queries. materialize : {"all", "summary", "none"}, default "all" Controls which helper tables are materialized upfront. @@ -76,16 +76,16 @@ def compare( """ materialize_summary, materialize_keys = v.resolve_materialize(materialize) - conn = v.resolve_connection(connection) + conn = v.resolve_connection(con) clean_ids = v.validate_table_id(table_id) by_columns = v.normalize_column_list(by, "by", allow_empty=False) - connection_supplied = connection is not None + con_supplied = con is not None handles = { clean_ids[0]: i.build_table_handle( - conn, table_a, clean_ids[0], connection_supplied=connection_supplied + conn, table_a, clean_ids[0], connection_supplied=con_supplied ), clean_ids[1]: i.build_table_handle( - conn, table_b, clean_ids[1], connection_supplied=connection_supplied + conn, table_b, clean_ids[1], connection_supplied=con_supplied ), } v.validate_tables(conn, handles, clean_ids, by_columns, coerce=coerce) diff --git a/tests/test_compare.py b/tests/test_compare.py index 8a6efb7..537a775 100644 --- a/tests/test_compare.py +++ b/tests/test_compare.py @@ -63,7 +63,7 @@ def comparison_from_sql(sql_a: str, sql_b: str, *, by, **kwargs): con = duckdb.connect() rel_a = con.sql(sql_a) rel_b = con.sql(sql_b) - return compare(rel_a, rel_b, by=by, connection=con, **kwargs) + return compare(rel_a, rel_b, by=by, con=con, **kwargs) def identical_comparison(): @@ -82,7 +82,7 @@ def identical_comparison(): def test_compare_summary(): con, rel_a, rel_b = build_connection() - comp = compare(rel_a, rel_b, by=["id"], connection=con) + comp = compare(rel_a, rel_b, by=["id"], con=con) assert rel_values(comp.tables, "nrow") == [3, 3] value_row = rel_dicts(comp.intersection.filter("\"column\" = 'value'"))[0] assert value_row["n_diffs"] == 1 @@ -90,7 +90,7 @@ def test_compare_summary(): def test_inputs_property_exposes_relations(): con, rel_a, rel_b = build_connection() - comp = compare(rel_a, rel_b, by=["id"], connection=con) + comp = compare(rel_a, rel_b, by=["id"], con=con) inputs = comp.inputs assert isinstance(inputs, dict) assert "a" in inputs and "b" in inputs @@ -104,7 +104,7 @@ def test_compare_accepts_pandas_polars_frames(): con = duckdb.connect() df_a = pandas.DataFrame({"id": [1, 2], "value": [10, 20]}) df_b = polars.DataFrame({"id": [1, 2], "value": [10, 22]}) - comp = compare(df_a, df_b, by=["id"], connection=con) + comp = compare(df_a, df_b, by=["id"], con=con) value_row = rel_dicts(comp.intersection.filter("\"column\" = 'value'"))[0] assert value_row["n_diffs"] == 1 comp.close() @@ -113,7 +113,7 @@ def test_compare_accepts_pandas_polars_frames(): def test_value_diffs_and_slice(): con, rel_a, rel_b = build_connection() - comp = compare(rel_a, rel_b, by=["id"], connection=con) + comp = compare(rel_a, rel_b, by=["id"], con=con) diffs = comp.value_diffs("value") assert rel_first(diffs, "id") == 2 rows = comp.slice_diffs("a", ["value"]) @@ -122,14 +122,14 @@ def test_value_diffs_and_slice(): def test_weave_wide(): con, rel_a, rel_b = build_connection() - comp = compare(rel_a, rel_b, by=["id"], connection=con) + comp = compare(rel_a, rel_b, by=["id"], con=con) wide = comp.weave_diffs_wide(["value"]) assert "value_a" in wide.columns and "value_b" in wide.columns def test_slice_unmatched(): con, rel_a, rel_b = build_connection() - comp = compare(rel_a, rel_b, by=["id"], connection=con) + comp = compare(rel_a, rel_b, by=["id"], con=con) unmatched = comp.slice_unmatched("a") assert rel_first(unmatched, "id") == 1 comp.close() @@ -141,7 +141,7 @@ def test_compare_accepts_dataframes(module_name): df_a = module.DataFrame({"id": [1, 2, 3], "value": [10, 20, 30]}) df_b = module.DataFrame({"id": [2, 3, 4], "value": [22, 30, 40]}) con = duckdb.connect() - comp = compare(df_a, df_b, by=["id"], connection=con) + comp = compare(df_a, df_b, by=["id"], con=con) diffs = comp.value_diffs("value") assert rel_first(diffs, "id") == 2 comp.close() @@ -151,7 +151,7 @@ def test_compare_accepts_dataframes(module_name): @pytest.mark.parametrize("materialize", ["all", "summary", "none"]) def test_materialize_modes_helpers(materialize): con, rel_a, rel_b = build_connection() - comp = compare(rel_a, rel_b, by=["id"], connection=con, materialize=materialize) + comp = compare(rel_a, rel_b, by=["id"], con=con, materialize=materialize) assert rel_values(comp.tables, "nrow") == [3, 3] diffs_row = rel_dicts(comp.intersection.filter("\"column\" = 'value'"))[0] assert diffs_row["n_diffs"] == 1 @@ -183,7 +183,7 @@ def test_materialize_modes_helpers(materialize): ) def test_materialize_modes_state(materialize, summary_materialized, has_diff_table): con, rel_a, rel_b = build_connection() - comp = compare(rel_a, rel_b, by=["id"], connection=con, materialize=materialize) + comp = compare(rel_a, rel_b, by=["id"], con=con, materialize=materialize) assert comp.intersection.materialized is summary_materialized assert comp.unmatched_rows.materialized is summary_materialized assert (comp.diff_table is not None) is has_diff_table @@ -223,7 +223,7 @@ def test_summary_reports_difference_categories(): ) AS t(id, value, note) """ ) - comp = compare(rel_a, rel_b, by=["id"], connection=con) + comp = compare(rel_a, rel_b, by=["id"], con=con) summary = comp.summary() assert summary.fetchall() == [ ("value_diffs", True), @@ -240,7 +240,7 @@ def test_summary_repr_shows_full_difference_labels(): examples.example_cars_a(con), examples.example_cars_b(con), by=["car"], - connection=con, + con=con, ) rendered = str(comp.summary()) assert "unmatched_cols" in rendered @@ -275,7 +275,7 @@ def test_duplicate_by_raises(): """ ) with pytest.raises(ComparisonError): - compare(rel_dup, rel_other, by=["id"], connection=con) + compare(rel_dup, rel_other, by=["id"], con=con) def test_examples_available(): @@ -284,7 +284,7 @@ def test_examples_available(): examples.example_cars_a(con), examples.example_cars_b(con), by=["car"], - connection=con, + con=con, ) assert rel_dicts(comp.intersection.filter("\"column\" = 'mpg'"))[0]["n_diffs"] == 2 comp.close() @@ -296,16 +296,16 @@ def test_compare_errors_when_by_column_missing(): rel_a = con.sql("SELECT 1 AS id, 10 AS value") rel_b = con.sql("SELECT 1 AS other_id, 10 AS value") with pytest.raises(ComparisonError): - compare(rel_a, rel_b, by=["id"], connection=con) + compare(rel_a, rel_b, by=["id"], con=con) def test_compare_errors_on_string_inputs(): con = duckdb.connect() rel = con.sql("SELECT 1 AS id") with pytest.raises(ComparisonError, match=r"String inputs are not supported"): - compare(cast(Any, "SELECT 1 AS id"), rel, by=["id"], connection=con) + compare(cast(Any, "SELECT 1 AS id"), rel, by=["id"], con=con) with pytest.raises(ComparisonError, match=r"String inputs are not supported"): - compare(rel, cast(Any, "SELECT 1 AS id"), by=["id"], connection=con) + compare(rel, cast(Any, "SELECT 1 AS id"), by=["id"], con=con) con.close() @@ -315,7 +315,7 @@ def test_compare_errors_on_duplicate_column_names(): df_b = pandas.DataFrame([[1, 2]], columns=["id", "value"]) con = duckdb.connect() with pytest.raises(ComparisonError, match=r"duplicate column names"): - compare(df_a, df_b, by=["id"], connection=con) + compare(df_a, df_b, by=["id"], con=con) con.close() @@ -339,27 +339,27 @@ def test_compare_errors_when_table_id_invalid_length(): con, rel_a, rel_b = build_connection() with pytest.raises(ComparisonError): bad_table_id = cast(Any, ["x"]) - compare(rel_a, rel_b, by=["id"], table_id=bad_table_id, connection=con) + compare(rel_a, rel_b, by=["id"], table_id=bad_table_id, con=con) def test_compare_errors_when_table_id_duplicates(): con, rel_a, rel_b = build_connection() with pytest.raises(ComparisonError): - compare(rel_a, rel_b, by=["id"], table_id=("dup", "dup"), connection=con) + compare(rel_a, rel_b, by=["id"], table_id=("dup", "dup"), con=con) def test_compare_errors_when_table_id_blank(): con, rel_a, rel_b = build_connection() with pytest.raises(ComparisonError): - compare(rel_a, rel_b, by=["id"], table_id=(" ", "b"), connection=con) + compare(rel_a, rel_b, by=["id"], table_id=(" ", "b"), con=con) def test_compare_errors_when_materialize_invalid(): con, rel_a, rel_b = build_connection() with pytest.raises(ComparisonError): - compare(rel_a, rel_b, by=["id"], connection=con, materialize=cast(Any, "nope")) + compare(rel_a, rel_b, by=["id"], con=con, materialize=cast(Any, "nope")) with pytest.raises(ComparisonError): - compare(rel_a, rel_b, by=["id"], connection=con, materialize=cast(Any, True)) + compare(rel_a, rel_b, by=["id"], con=con, materialize=cast(Any, True)) con.close() @@ -552,7 +552,7 @@ def test_comparison_repr_snapshot(): con.execute( "CREATE OR REPLACE TABLE bar AS SELECT * FROM (VALUES (2, 22, 'y'), (3, 30, 'z')) AS t(id, value, extra)" ) - comp = compare(con.table("foo"), con.table("bar"), by=["id"], connection=con) + comp = compare(con.table("foo"), con.table("bar"), by=["id"], con=con) text = repr(comp) assert "Comparison(tables=" in text assert "by=" in text diff --git a/tests/test_slice_diffs.py b/tests/test_slice_diffs.py index 48a84af..1a3a526 100644 --- a/tests/test_slice_diffs.py +++ b/tests/test_slice_diffs.py @@ -27,7 +27,7 @@ def comparison_for_slice(): "AS t(id, value, other, note)" ), by=["id"], - connection=con, + con=con, ) yield comp comp.close() diff --git a/tests/test_slice_unmatched.py b/tests/test_slice_unmatched.py index 4a28fc7..a7d1c37 100644 --- a/tests/test_slice_unmatched.py +++ b/tests/test_slice_unmatched.py @@ -15,7 +15,7 @@ def comparison_with_unmatched(): con.sql("SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(id, value)"), con.sql("SELECT * FROM (VALUES (2, 20), (3, 30), (4, 40)) AS t(id, value)"), by=["id"], - connection=con, + con=con, ) yield comp comp.close() @@ -45,7 +45,7 @@ def test_slice_unmatched_respects_custom_table_id(): con.sql("SELECT * FROM (VALUES (2, 20), (3, 30), (4, 40)) AS t(id, value)"), by=["id"], table_id=("left", "right"), - connection=con, + con=con, ) left = comp.slice_unmatched("left") assert rel_values(left, "id") == [1] diff --git a/tests/test_value_diffs.py b/tests/test_value_diffs.py index 19a00d8..b7c7678 100644 --- a/tests/test_value_diffs.py +++ b/tests/test_value_diffs.py @@ -41,7 +41,7 @@ def comparison_with_diffs(): ) AS t(id, value, wind, note) """ ) - comp = compare(rel_a, rel_b, by=["id"], connection=con) + comp = compare(rel_a, rel_b, by=["id"], con=con) yield comp comp.close() con.close() @@ -70,7 +70,7 @@ def test_value_diffs_stacked_errors_when_no_value_columns(): con.sql("SELECT * FROM (VALUES (1, 'x')) AS t(id, tag)"), con.sql("SELECT * FROM (VALUES (1, 'x')) AS t(id, tag)"), by=["id", "tag"], - connection=con, + con=con, ) with pytest.raises(ComparisonError): comp.value_diffs_stacked() @@ -100,7 +100,7 @@ def test_value_diffs_stacked_handles_incompatible_types(): "AS t(id, alpha, beta)" ), by=["id"], - connection=con, + con=con, ) out = comp.value_diffs_stacked(["alpha", "beta"]) assert set(rel_values(out, "column")) == {"alpha", "beta"} @@ -115,7 +115,7 @@ def test_value_diffs_respects_custom_table_ids(): con.sql("SELECT * FROM (VALUES (1, 15), (2, 20)) AS t(id, value)"), by=["id"], table_id=("original", "updated"), - connection=con, + con=con, ) out = comp.value_diffs("value") assert {"value_original", "value_updated"}.issubset(set(out.columns)) diff --git a/tests/test_weave_diffs.py b/tests/test_weave_diffs.py index 38b4796..a46b1e4 100644 --- a/tests/test_weave_diffs.py +++ b/tests/test_weave_diffs.py @@ -19,7 +19,7 @@ def comparison_for_weave(): con.sql("SELECT * FROM (VALUES (1, 10, 1), (2, 20, 1)) AS t(id, value, wind)"), con.sql("SELECT * FROM (VALUES (1, 10, 2), (2, 25, 1)) AS t(id, value, wind)"), by=["id"], - connection=con, + con=con, ) yield comp comp.close() @@ -59,7 +59,7 @@ def test_weave_diffs_long_empty_when_no_differences(): con.sql("SELECT * FROM (VALUES (1, 10)) AS t(id, value)"), con.sql("SELECT * FROM (VALUES (1, 10)) AS t(id, value)"), by=["id"], - connection=con, + con=con, ) out = comp.weave_diffs_long(["value"]) assert rel_height(out) == 0 @@ -73,7 +73,7 @@ def test_weave_diffs_long_interleaves_rows(): con.sql("SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(id, value)"), con.sql("SELECT * FROM (VALUES (1, 11), (2, 25)) AS t(id, value)"), by=["id"], - connection=con, + con=con, ) out = comp.weave_diffs_long(["value"]) assert rel_values(out, "table_name") == ["a", "b", "a", "b"] @@ -89,7 +89,7 @@ def test_weave_diffs_respects_custom_table_ids(): con.sql("SELECT * FROM (VALUES (1, 15), (2, 20)) AS t(id, value)"), by=["id"], table_id=("original", "updated"), - connection=con, + con=con, ) wide = comp.weave_diffs_wide(["value"]) assert {"value_original", "value_updated"}.issubset(set(wide.columns))