Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,15 +740,19 @@ def text(self, clause: str) -> TextClause:
def get_sqla_row_level_filters(
self,
template_processor: Optional[BaseTemplateProcessor] = None,
include_global_guest_rls: bool = True,
) -> list[TextClause]:
"""
Return the appropriate row level security filters for this table and the
current user. A custom username can be passed when the user is not present in the
Flask global namespace.
current user.

:param template_processor: The template processor to apply to the filters.
:param include_global_guest_rls: Whether to include global (unscoped) guest
RLS filters. Set to False for underlying tables in virtual datasets to
prevent double application of global guest rules. Dataset-scoped guest
rules are always included regardless of this parameter.
:returns: A list of SQL clauses to be ANDed together.
""" # noqa: E501
"""
template_processor = template_processor or self.get_template_processor()

all_filters: list[TextClause] = []
Expand All @@ -765,6 +769,8 @@ def get_sqla_row_level_filters(

if is_feature_enabled("EMBEDDED_SUPERSET"):
for rule in security_manager.get_guest_rls_filters(self):
if not include_global_guest_rls and not rule.get("dataset"):
continue
clause = self.text(
f"({template_processor.process_template(rule['clause'])})"
)
Expand Down
1 change: 1 addition & 0 deletions superset/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,7 @@ def get_fetch_values_predicate(
def get_sqla_row_level_filters(
self,
template_processor: Optional[BaseTemplateProcessor] = None, # pylint: disable=unused-argument
include_global_guest_rls: bool = True, # pylint: disable=unused-argument
) -> list[TextClause]:
# TODO: We should refactor this mixin and remove this method
# as it exists in the BaseDatasource and is not applicable
Expand Down
9 changes: 8 additions & 1 deletion superset/utils/rls.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,21 @@ def get_predicates_for_table(
if not dataset:
return []

# Exclude global (unscoped) guest RLS to prevent double application in
# virtual datasets. Global guest rules will be applied to the outer query
# via get_sqla_row_level_filters() on the virtual dataset itself.
# Dataset-scoped guest rules are still included here because they target
# this specific physical dataset and won't match on the outer query.
return [
str(
predicate.compile(
dialect=database.get_dialect(),
compile_kwargs={"literal_binds": True},
)
)
for predicate in dataset.get_sqla_row_level_filters()
for predicate in dataset.get_sqla_row_level_filters(
include_global_guest_rls=False,
)
]


Expand Down
44 changes: 44 additions & 0 deletions tests/integration_tests/security/row_level_security_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,3 +845,47 @@ def test_dataset_id_can_be_string(self):
sql = dataset.get_query_str(self.query_obj)

assert re.search(RLS_ALICE_REGEX, sql)

@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
@pytest.mark.usefixtures("load_energy_table_with_slice")
def test_global_rls_not_applied_to_virtual_dataset_inner_tables(self):
"""
Global guest RLS (no dataset ID) should only be applied to the outer
query of a virtual dataset, not to individual inner tables. This
prevents SQL errors when the RLS column doesn't exist on all inner
tables (e.g., a virtual dataset JOINing tables where only one has the
filtered column).
"""
birth_names = self.get_table(name="birth_names")

virtual_dataset = SqlaTable(
table_name="virtual_birth_names_rls_test",
database=birth_names.database,
schema=birth_names.schema,
sql=f"SELECT * FROM {birth_names.table_name}", # noqa: S608
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The new regression test does not actually create a virtual dataset with multiple inner tables, so it cannot catch the original bug scenario where a global guest filter is incorrectly pushed down to joined inner tables that lack the filtered column. Build the virtual dataset with a JOIN so the test exercises the intended failure mode. [logic error]

Severity Level: Major ⚠️
- ⚠️ Embedded guest JOIN regressions escape integration test coverage.
- ⚠️ CI may pass despite broken virtual-dataset RLS behavior.
Suggested change
sql=f"SELECT * FROM {birth_names.table_name}", # noqa: S608
sql=f"SELECT b.name, e.value FROM {birth_names.table_name} b JOIN energy_usage e ON 1=1", # noqa: S608
Steps of Reproduction ✅
1. Run
`GuestTokenRowLevelSecurityTests.test_global_rls_not_applied_to_virtual_dataset_inner_tables`
in `tests/integration_tests/security/row_level_security_tests.py:851-891`; it builds
virtual SQL at line 865 as `SELECT * FROM birth_names` (single inner table).

2. The test calls `virtual_dataset.get_query_str(query_obj)` at
`row_level_security_tests.py:885`, which flows into
`superset/models/helpers.py:get_query_str` (line 2186) and then virtual SQL parsing in
`get_from_clause` (line 2029).

3. Inner-table RLS application loops `parsed_statement.tables` in
`superset/utils/rls.py:53-63`; with current test SQL there is only one table
(`birth_names`), so no multi-table pushdown case is exercised.

4. The documented failure mode needs joined inner tables with different schemas;
`energy_usage` fixture columns are `source/target/value`
(`tests/integration_tests/fixtures/energy_dashboard.py:49`), so adding JOIN coverage is
required to validate "column missing on some inner tables" regressions.
Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** tests/integration_tests/security/row_level_security_tests.py
**Line:** 865:865
**Comment:**
	*Logic Error: The new regression test does not actually create a virtual dataset with multiple inner tables, so it cannot catch the original bug scenario where a global guest filter is incorrectly pushed down to joined inner tables that lack the filtered column. Build the virtual dataset with a JOIN so the test exercises the intended failure mode.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
👍 | 👎

)
db.session.add(virtual_dataset)
db.session.commit()

try:
# Global RLS rule (no dataset ID) — should only apply to outer
# query, not to the inner birth_names table reference
g.user = self.guest_user_with_rls(rules=[{"clause": "name = 'Alice'"}])
query_obj = {
"groupby": [],
"metrics": None,
"filter": [],
"is_timeseries": False,
"columns": ["name"],
"granularity": None,
"from_dttm": None,
"to_dttm": None,
"extras": {},
}
sql = virtual_dataset.get_query_str(query_obj)

# Guest RLS should appear in the outer WHERE clause
assert re.search(RLS_ALICE_REGEX, sql)
Comment on lines +887 to +888
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The assertion only checks that the guest RLS clause exists somewhere in the SQL, which still passes if the clause is applied both to inner tables and the outer query. Assert that the clause appears exactly once to verify the fix truly prevents duplicate inner-table application. [logic error]

Severity Level: Major ⚠️
- ⚠️ Duplicate guest RLS injection not detected by assertion.
- ⚠️ Regression can ship despite failing outer-only guarantee.
Suggested change
# Guest RLS should appear in the outer WHERE clause
assert re.search(RLS_ALICE_REGEX, sql)
# Guest RLS should appear exactly once (outer query only)
assert len(re.findall(RLS_ALICE_REGEX, sql)) == 1
Steps of Reproduction ✅
1. In `tests/integration_tests/security/row_level_security_tests.py:887-888`, the check
uses `re.search`, which validates only "at least one match".

2. Query generation path for this test is `virtual_dataset.get_query_str()`
(`row_level_security_tests.py:885`) → `superset/models/helpers.py:get_sqla_query` where
outer RLS is appended at line 3215.

3. Guest RLS clauses are produced in `superset/connectors/sqla/models.py:770-772`; if
guest RLS is mistakenly also included for inner virtual-dataset tables, SQL can contain
the same clause multiple times (inner + outer).

4. With duplicated predicates, `re.search(RLS_ALICE_REGEX, sql)` still passes;
`len(re.findall(...)) == 1` is needed to assert the fix intent ("outer query only") and
catch duplicate injection regressions.
Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** tests/integration_tests/security/row_level_security_tests.py
**Line:** 887:888
**Comment:**
	*Logic Error: The assertion only checks that the guest RLS clause exists somewhere in the SQL, which still passes if the clause is applied both to inner tables and the outer query. Assert that the clause appears exactly once to verify the fix truly prevents duplicate inner-table application.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
👍 | 👎

finally:
db.session.delete(virtual_dataset)
db.session.commit()
28 changes: 28 additions & 0 deletions tests/unit_tests/sql_lab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,31 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None:

table = Table("t1", "public", "examples")
assert get_predicates_for_table(table, database, "examples") == ["c1 = 1"]
dataset.get_sqla_row_level_filters.assert_called_once_with(
include_global_guest_rls=False,
)


def test_get_predicates_for_table_excludes_global_guest_rls(
mocker: MockerFixture,
) -> None:
"""
Test that ``get_predicates_for_table`` passes
``include_global_guest_rls=False`` to ``dataset.get_sqla_row_level_filters``
to prevent double application of global guest RLS in virtual datasets.
"""
database = mocker.MagicMock()
dataset = mocker.MagicMock()
predicate = mocker.MagicMock()
predicate.compile.return_value = "c1 = 1"
dataset.get_sqla_row_level_filters.return_value = [predicate]
db = mocker.patch("superset.utils.rls.db")
db.session.query().filter().one_or_none.return_value = dataset

table = Table("t1", "public", "examples")
result = get_predicates_for_table(table, database, "examples")

assert result == ["c1 = 1"]
dataset.get_sqla_row_level_filters.assert_called_once_with(
include_global_guest_rls=False,
)
Loading