From ede872dcd69afc6f05af4ecb08de8190761c127a Mon Sep 17 00:00:00 2001 From: Kamil Gabryjelski Date: Thu, 12 Mar 2026 11:55:51 +0000 Subject: [PATCH 1/2] fix(embedded): prevent global guest RLS from being applied to virtual dataset inner tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Global guest RLS rules (without a dataset ID) were being applied to each physical table inside a virtual dataset's SQL, in addition to the outer query. When the RLS clause referenced a column that didn't exist on all inner tables (e.g., a JOIN), this caused SQL errors. The fix adds an `include_guest_rls` parameter (default True) threaded through `get_sqla_row_level_filters()` → `get_predicates_for_table()` → `apply_rls()`. In `get_from_clause()`, `include_guest_rls=False` is passed so inner tables skip guest RLS. Guest RLS continues to be applied once at the outer virtual dataset query level. Fixes #37359 Fixes #37551 Co-Authored-By: Claude Opus 4.6 --- superset/connectors/sqla/models.py | 6 ++- superset/models/helpers.py | 2 + superset/utils/rls.py | 14 +++++- .../security/row_level_security_tests.py | 44 ++++++++++++++++ tests/unit_tests/sql_lab_test.py | 50 +++++++++++++++++++ 5 files changed, 114 insertions(+), 2 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 1514a944e98c..6a6260627842 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -740,6 +740,7 @@ def text(self, clause: str) -> TextClause: def get_sqla_row_level_filters( self, template_processor: Optional[BaseTemplateProcessor] = None, + include_guest_rls: bool = True, ) -> list[TextClause]: """ Return the appropriate row level security filters for this table and the @@ -747,6 +748,9 @@ def get_sqla_row_level_filters( Flask global namespace. :param template_processor: The template processor to apply to the filters. + :param include_guest_rls: Whether to include guest token RLS filters. + Set to False when applying RLS to inner tables of virtual datasets, + since guest RLS is applied at the outer query level. :returns: A list of SQL clauses to be ANDed together. """ # noqa: E501 template_processor = template_processor or self.get_template_processor() @@ -763,7 +767,7 @@ def get_sqla_row_level_filters( else: all_filters.append(clause) - if is_feature_enabled("EMBEDDED_SUPERSET"): + if include_guest_rls and is_feature_enabled("EMBEDDED_SUPERSET"): for rule in security_manager.get_guest_rls_filters(self): clause = self.text( f"({template_processor.process_template(rule['clause'])})" diff --git a/superset/models/helpers.py b/superset/models/helpers.py index f6f28f63850d..9c42022af443 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -903,6 +903,7 @@ def get_fetch_values_predicate( def get_sqla_row_level_filters( self, template_processor: Optional[BaseTemplateProcessor] = None, # pylint: disable=unused-argument + include_guest_rls: bool = True, # pylint: disable=unused-argument ) -> list[TextClause]: # TODO: We should refactor this mixin and remove this method # as it exists in the BaseDatasource and is not applicable @@ -2054,6 +2055,7 @@ def get_from_clause( self.catalog, self.schema or default_schema or "", statement, + include_guest_rls=False, ) # Regenerate the SQL after RLS application from_sql = parsed_script.format() diff --git a/superset/utils/rls.py b/superset/utils/rls.py index 43f7da9ffe81..17188105b250 100644 --- a/superset/utils/rls.py +++ b/superset/utils/rls.py @@ -34,9 +34,14 @@ def apply_rls( catalog: str | None, schema: str, parsed_statement: BaseSQLStatement[Any], + include_guest_rls: bool = True, ) -> None: """ Modify statement inplace to ensure RLS rules are applied. + + :param include_guest_rls: Whether to include guest token RLS filters. + Set to False when applying RLS to inner tables of virtual datasets, + since guest RLS is applied at the outer query level. """ # There are two ways to insert RLS: either replacing the table with a subquery # that has the RLS, or appending the RLS to the ``WHERE`` clause. The former is @@ -53,6 +58,7 @@ def apply_rls( table, database, database.get_default_catalog(), + include_guest_rls=include_guest_rls, ) if predicate ] @@ -64,6 +70,7 @@ def get_predicates_for_table( table: Table, database: Database, default_catalog: str | None, + include_guest_rls: bool = True, ) -> list[str]: """ Get the RLS predicates for a table. @@ -71,6 +78,9 @@ def get_predicates_for_table( This is used to inject RLS rules into SQL statements run in SQL Lab. Note that the table must be fully qualified, with catalog (null if the DB doesn't support) and schema. + + :param include_guest_rls: Whether to include guest token RLS filters. + Set to False when applying RLS to inner tables of virtual datasets. """ from superset.connectors.sqla.models import SqlaTable @@ -105,7 +115,9 @@ def get_predicates_for_table( compile_kwargs={"literal_binds": True}, ) ) - for predicate in dataset.get_sqla_row_level_filters() + for predicate in dataset.get_sqla_row_level_filters( + include_guest_rls=include_guest_rls, + ) ] diff --git a/tests/integration_tests/security/row_level_security_tests.py b/tests/integration_tests/security/row_level_security_tests.py index cab80b49476e..22652023eeb2 100644 --- a/tests/integration_tests/security/row_level_security_tests.py +++ b/tests/integration_tests/security/row_level_security_tests.py @@ -845,3 +845,47 @@ def test_dataset_id_can_be_string(self): sql = dataset.get_query_str(self.query_obj) assert re.search(RLS_ALICE_REGEX, sql) + + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + @pytest.mark.usefixtures("load_energy_table_with_slice") + def test_global_rls_not_applied_to_virtual_dataset_inner_tables(self): + """ + Global guest RLS (no dataset ID) should only be applied to the outer + query of a virtual dataset, not to individual inner tables. This + prevents SQL errors when the RLS column doesn't exist on all inner + tables (e.g., a virtual dataset JOINing tables where only one has the + filtered column). + """ + birth_names = self.get_table(name="birth_names") + + virtual_dataset = SqlaTable( + table_name="virtual_birth_names_rls_test", + database=birth_names.database, + schema=birth_names.schema, + sql=f"SELECT * FROM {birth_names.table_name}", # noqa: S608 + ) + db.session.add(virtual_dataset) + db.session.commit() + + try: + # Global RLS rule (no dataset ID) — should only apply to outer + # query, not to the inner birth_names table reference + g.user = self.guest_user_with_rls(rules=[{"clause": "name = 'Alice'"}]) + query_obj = { + "groupby": [], + "metrics": None, + "filter": [], + "is_timeseries": False, + "columns": ["name"], + "granularity": None, + "from_dttm": None, + "to_dttm": None, + "extras": {}, + } + sql = virtual_dataset.get_query_str(query_obj) + + # Guest RLS should appear in the outer WHERE clause + assert re.search(RLS_ALICE_REGEX, sql) + finally: + db.session.delete(virtual_dataset) + db.session.commit() diff --git a/tests/unit_tests/sql_lab_test.py b/tests/unit_tests/sql_lab_test.py index 9fd3a0ac0e24..10f4a1d7936e 100644 --- a/tests/unit_tests/sql_lab_test.py +++ b/tests/unit_tests/sql_lab_test.py @@ -326,3 +326,53 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None: table = Table("t1", "public", "examples") assert get_predicates_for_table(table, database, "examples") == ["c1 = 1"] + + +def test_apply_rls_exclude_guest_rls(mocker: MockerFixture) -> None: + """ + Test that ``apply_rls`` passes ``include_guest_rls`` to + ``get_predicates_for_table``. + """ + database = mocker.MagicMock() + database.get_default_schema_for_query.return_value = "public" + database.get_default_catalog.return_value = "examples" + database.db_engine_spec = PostgresEngineSpec + mock_get_predicates = mocker.patch( + "superset.utils.rls.get_predicates_for_table", + return_value=[], + ) + + parsed_statement = SQLStatement("SELECT * FROM t1", "postgresql") + + apply_rls(database, "examples", "public", parsed_statement, include_guest_rls=False) + + mock_get_predicates.assert_called_once_with( + Table("t1", "public", "examples"), + database, + "examples", + include_guest_rls=False, + ) + + +def test_get_predicates_for_table_exclude_guest_rls(mocker: MockerFixture) -> None: + """ + Test that ``get_predicates_for_table`` passes ``include_guest_rls`` to + ``dataset.get_sqla_row_level_filters``. + """ + database = mocker.MagicMock() + dataset = mocker.MagicMock() + predicate = mocker.MagicMock() + predicate.compile.return_value = "c1 = 1" + dataset.get_sqla_row_level_filters.return_value = [predicate] + db = mocker.patch("superset.utils.rls.db") + db.session.query().filter().one_or_none.return_value = dataset + + table = Table("t1", "public", "examples") + result = get_predicates_for_table( + table, database, "examples", include_guest_rls=False + ) + + assert result == ["c1 = 1"] + dataset.get_sqla_row_level_filters.assert_called_once_with( + include_guest_rls=False, + ) From b995e5356c2dad1b7cc75bd0a6e5abb54751ba89 Mon Sep 17 00:00:00 2001 From: Kamil Gabryjelski Date: Thu, 12 Mar 2026 12:03:15 +0000 Subject: [PATCH 2/2] fix(embedded): prevent global guest RLS from being applied to virtual dataset inner tables Global guest RLS rules (without a dataset ID) were being applied to each physical table inside a virtual dataset's SQL, in addition to the outer query. When the RLS clause referenced a column that didn't exist on all inner tables (e.g., a JOIN), this caused SQL errors. The fix adds an `include_global_guest_rls` parameter to `get_sqla_row_level_filters()` that, when False, skips only global (unscoped) guest RLS rules while preserving dataset-scoped guest rules. `get_predicates_for_table()` passes `include_global_guest_rls=False` so that inner physical tables skip global guest RLS. Global guest RLS continues to be applied once at the outer virtual dataset query level. Dataset-scoped guest rules (those with a specific dataset ID) are always applied regardless, ensuring they reach the targeted physical table even inside virtual datasets. Fixes #37359 Fixes #37551 Co-Authored-By: Claude Opus 4.6 --- superset/connectors/sqla/models.py | 18 +++++++------ superset/models/helpers.py | 3 +-- superset/utils/rls.py | 17 +++++------- tests/unit_tests/sql_lab_test.py | 42 +++++++----------------------- 4 files changed, 27 insertions(+), 53 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 6a6260627842..b49e9ba4d337 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -740,19 +740,19 @@ def text(self, clause: str) -> TextClause: def get_sqla_row_level_filters( self, template_processor: Optional[BaseTemplateProcessor] = None, - include_guest_rls: bool = True, + include_global_guest_rls: bool = True, ) -> list[TextClause]: """ Return the appropriate row level security filters for this table and the - current user. A custom username can be passed when the user is not present in the - Flask global namespace. + current user. :param template_processor: The template processor to apply to the filters. - :param include_guest_rls: Whether to include guest token RLS filters. - Set to False when applying RLS to inner tables of virtual datasets, - since guest RLS is applied at the outer query level. + :param include_global_guest_rls: Whether to include global (unscoped) guest + RLS filters. Set to False for underlying tables in virtual datasets to + prevent double application of global guest rules. Dataset-scoped guest + rules are always included regardless of this parameter. :returns: A list of SQL clauses to be ANDed together. - """ # noqa: E501 + """ template_processor = template_processor or self.get_template_processor() all_filters: list[TextClause] = [] @@ -767,8 +767,10 @@ def get_sqla_row_level_filters( else: all_filters.append(clause) - if include_guest_rls and is_feature_enabled("EMBEDDED_SUPERSET"): + if is_feature_enabled("EMBEDDED_SUPERSET"): for rule in security_manager.get_guest_rls_filters(self): + if not include_global_guest_rls and not rule.get("dataset"): + continue clause = self.text( f"({template_processor.process_template(rule['clause'])})" ) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 9c42022af443..704b0f4c56a3 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -903,7 +903,7 @@ def get_fetch_values_predicate( def get_sqla_row_level_filters( self, template_processor: Optional[BaseTemplateProcessor] = None, # pylint: disable=unused-argument - include_guest_rls: bool = True, # pylint: disable=unused-argument + include_global_guest_rls: bool = True, # pylint: disable=unused-argument ) -> list[TextClause]: # TODO: We should refactor this mixin and remove this method # as it exists in the BaseDatasource and is not applicable @@ -2055,7 +2055,6 @@ def get_from_clause( self.catalog, self.schema or default_schema or "", statement, - include_guest_rls=False, ) # Regenerate the SQL after RLS application from_sql = parsed_script.format() diff --git a/superset/utils/rls.py b/superset/utils/rls.py index 17188105b250..a2b379caec08 100644 --- a/superset/utils/rls.py +++ b/superset/utils/rls.py @@ -34,14 +34,9 @@ def apply_rls( catalog: str | None, schema: str, parsed_statement: BaseSQLStatement[Any], - include_guest_rls: bool = True, ) -> None: """ Modify statement inplace to ensure RLS rules are applied. - - :param include_guest_rls: Whether to include guest token RLS filters. - Set to False when applying RLS to inner tables of virtual datasets, - since guest RLS is applied at the outer query level. """ # There are two ways to insert RLS: either replacing the table with a subquery # that has the RLS, or appending the RLS to the ``WHERE`` clause. The former is @@ -58,7 +53,6 @@ def apply_rls( table, database, database.get_default_catalog(), - include_guest_rls=include_guest_rls, ) if predicate ] @@ -70,7 +64,6 @@ def get_predicates_for_table( table: Table, database: Database, default_catalog: str | None, - include_guest_rls: bool = True, ) -> list[str]: """ Get the RLS predicates for a table. @@ -78,9 +71,6 @@ def get_predicates_for_table( This is used to inject RLS rules into SQL statements run in SQL Lab. Note that the table must be fully qualified, with catalog (null if the DB doesn't support) and schema. - - :param include_guest_rls: Whether to include guest token RLS filters. - Set to False when applying RLS to inner tables of virtual datasets. """ from superset.connectors.sqla.models import SqlaTable @@ -108,6 +98,11 @@ def get_predicates_for_table( if not dataset: return [] + # Exclude global (unscoped) guest RLS to prevent double application in + # virtual datasets. Global guest rules will be applied to the outer query + # via get_sqla_row_level_filters() on the virtual dataset itself. + # Dataset-scoped guest rules are still included here because they target + # this specific physical dataset and won't match on the outer query. return [ str( predicate.compile( @@ -116,7 +111,7 @@ def get_predicates_for_table( ) ) for predicate in dataset.get_sqla_row_level_filters( - include_guest_rls=include_guest_rls, + include_global_guest_rls=False, ) ] diff --git a/tests/unit_tests/sql_lab_test.py b/tests/unit_tests/sql_lab_test.py index 10f4a1d7936e..cdf0c81bdce8 100644 --- a/tests/unit_tests/sql_lab_test.py +++ b/tests/unit_tests/sql_lab_test.py @@ -326,38 +326,18 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None: table = Table("t1", "public", "examples") assert get_predicates_for_table(table, database, "examples") == ["c1 = 1"] - - -def test_apply_rls_exclude_guest_rls(mocker: MockerFixture) -> None: - """ - Test that ``apply_rls`` passes ``include_guest_rls`` to - ``get_predicates_for_table``. - """ - database = mocker.MagicMock() - database.get_default_schema_for_query.return_value = "public" - database.get_default_catalog.return_value = "examples" - database.db_engine_spec = PostgresEngineSpec - mock_get_predicates = mocker.patch( - "superset.utils.rls.get_predicates_for_table", - return_value=[], - ) - - parsed_statement = SQLStatement("SELECT * FROM t1", "postgresql") - - apply_rls(database, "examples", "public", parsed_statement, include_guest_rls=False) - - mock_get_predicates.assert_called_once_with( - Table("t1", "public", "examples"), - database, - "examples", - include_guest_rls=False, + dataset.get_sqla_row_level_filters.assert_called_once_with( + include_global_guest_rls=False, ) -def test_get_predicates_for_table_exclude_guest_rls(mocker: MockerFixture) -> None: +def test_get_predicates_for_table_excludes_global_guest_rls( + mocker: MockerFixture, +) -> None: """ - Test that ``get_predicates_for_table`` passes ``include_guest_rls`` to - ``dataset.get_sqla_row_level_filters``. + Test that ``get_predicates_for_table`` passes + ``include_global_guest_rls=False`` to ``dataset.get_sqla_row_level_filters`` + to prevent double application of global guest RLS in virtual datasets. """ database = mocker.MagicMock() dataset = mocker.MagicMock() @@ -368,11 +348,9 @@ def test_get_predicates_for_table_exclude_guest_rls(mocker: MockerFixture) -> No db.session.query().filter().one_or_none.return_value = dataset table = Table("t1", "public", "examples") - result = get_predicates_for_table( - table, database, "examples", include_guest_rls=False - ) + result = get_predicates_for_table(table, database, "examples") assert result == ["c1 = 1"] dataset.get_sqla_row_level_filters.assert_called_once_with( - include_guest_rls=False, + include_global_guest_rls=False, )