Skip to content
This repository was archived by the owner on Nov 30, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ The types of changes are:
* Distinguish whether webhook has been visited and no fields were found, versus never visited [#1339](https://github.com/ethyca/fidesops/pull/1339)
* Fix Redis Cache Early Expiration in Tests [#1358](https://github.com/ethyca/fidesops/pull/1358)
* Limit values for the offset pagination strategy are now cast to integers before use [#1364](https://github.com/ethyca/fidesops/pull/1364)
* Allow `requires_input` PrivacyRequests to be addressed if a webhook is deleted, disabled, or updated [#1394](https://github.com/ethyca/fidesops/pull/1394)

### Added

Expand Down
39 changes: 39 additions & 0 deletions src/fidesops/ops/api/v1/endpoints/connection_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
ConnectionException,
)
from fidesops.ops.models.connectionconfig import ConnectionConfig, ConnectionType
from fidesops.ops.models.manual_webhook import AccessManualWebhook
from fidesops.ops.models.privacy_request import PrivacyRequest, PrivacyRequestStatus
from fidesops.ops.schemas.api import BulkUpdateFailed
from fidesops.ops.schemas.connection_configuration import (
connection_secrets_schemas,
Expand All @@ -58,6 +60,9 @@
)
from fidesops.ops.schemas.shared_schemas import FidesOpsKey
from fidesops.ops.service.connectors import get_connector
from fidesops.ops.service.privacy_request.request_runner_service import (
queue_privacy_request,
)
from fidesops.ops.util.api_router import APIRouter
from fidesops.ops.util.logger import Pii
from fidesops.ops.util.oauth_util import verify_oauth_client
Expand Down Expand Up @@ -222,6 +227,9 @@ def patch_connections(
)
)

# Check if possibly disabling a manual webhook here causes us to need to queue affected privacy requests
requeue_requires_input_requests(db)

return BulkPutConnectionConfiguration(
succeeded=created_or_updated,
failed=failed,
Expand All @@ -238,9 +246,15 @@ def delete_connection(
) -> None:
"""Removes the connection configuration with matching key."""
connection_config = get_connection_config_or_error(db, connection_key)
connection_type = connection_config.connection_type
logger.info("Deleting connection config with key '%s'.", connection_key)
connection_config.delete(db)

# Access Manual Webhooks are cascade deleted if their ConnectionConfig is deleted,
# so we queue any privacy requests that are no longer blocked by webhooks
if connection_type == ConnectionType.manual_webhook:
requeue_requires_input_requests(db)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a first step, I am just re-queuing applicable privacy requests if a connection is disabled or deleted. I omitted adding a recurring task to also check if any of these fell through the cracks in the interest of time. I think this kind of thing could be added as a piece of a broader work where we look for privacy requests of multiple statuses that are stuck.



def validate_secrets(
request_body: connection_secrets_schemas, connection_config: ConnectionConfig
Expand Down Expand Up @@ -356,3 +370,28 @@ async def test_connection_config_secrets(
connection_config = get_connection_config_or_error(db, connection_key)
msg = f"Test completed for ConnectionConfig with key: {connection_key}."
return connection_status(connection_config, msg, db)


def requeue_requires_input_requests(db: Session) -> None:
"""
Queue privacy requests with request status "requires_input" if they are no longer blocked by
access manual webhooks.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be better to say explicitly this is if all access manual webhooks are disabled or deleted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Sean, I've tried to improve this docstring


For use when all access manual webhooks have been either disabled or deleted, leaving privacy requests
lingering in a "requires_input" state.
"""
if not AccessManualWebhook.get_enabled(db):
for pr in PrivacyRequest.filter(
db=db,
conditions=(PrivacyRequest.status == PrivacyRequestStatus.requires_input),
):
logger.info(
"Queuing privacy request '%s with '%s' status now that manual inputs are no longer required.",
pr.id,
pr.status.value,
)
pr.status = PrivacyRequestStatus.in_processing
pr.save(db=db)
queue_privacy_request(
privacy_request_id=pr.id,
)
35 changes: 23 additions & 12 deletions src/fidesops/ops/api/v1/endpoints/privacy_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
FunctionalityNotConfigured,
IdentityNotFoundException,
IdentityVerificationException,
ManualWebhookFieldsUnset,
NoCachedManualWebhookEntry,
PolicyNotFoundException,
TraversalError,
Expand Down Expand Up @@ -1357,6 +1358,11 @@ def view_uploaded_manual_webhook_data(
) -> Optional[ManualWebhookData]:
"""
View uploaded data for this privacy request for the given access manual webhook

If no data exists for this webhook, we just return all fields as None.
If we have missing or extra fields saved, we'll just return the overlap between what is saved and what is defined on the webhook.

If checked=False, data must be reviewed before submission. The privacy request should not be submitted as-is.
"""
privacy_request: PrivacyRequest = get_privacy_request_or_error(
db, privacy_request_id
Expand All @@ -1368,7 +1374,8 @@ def view_uploaded_manual_webhook_data(
if not privacy_request.status == PrivacyRequestStatus.requires_input:
raise HTTPException(
status_code=HTTP_400_BAD_REQUEST,
detail=f"Invalid access manual webhook upload request: privacy request '{privacy_request.id}' status = {privacy_request.status.value}.", # type: ignore
detail=f"Invalid access manual webhook upload request: privacy request "
f"'{privacy_request.id}' status = {privacy_request.status.value}.", # type: ignore
)

try:
Expand All @@ -1377,20 +1384,20 @@ def view_uploaded_manual_webhook_data(
connection_config.key,
privacy_request.id,
)
data: Dict[str, Any] = privacy_request.get_manual_webhook_input(
data: Dict[str, Any] = privacy_request.get_manual_webhook_input_strict(
access_manual_webhook
)
checked = True
except NoCachedManualWebhookEntry as exc:
except (
PydanticValidationError,
ManualWebhookFieldsUnset,
NoCachedManualWebhookEntry,
) as exc:
logger.info(exc)
data = access_manual_webhook.empty_fields_dict
checked = False
except PydanticValidationError:
raise HTTPException(
status_code=HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Saved fields differ from fields specified on webhook '{access_manual_webhook.connection_config.key}'. "
f"Re-upload manual data using '{PRIVACY_REQUEST_ACCESS_MANUAL_WEBHOOK_INPUT.format(connection_key=connection_config.key, privacy_request_id=privacy_request.id)}'.",
data = privacy_request.get_manual_webhook_input_non_strict(
manual_webhook=access_manual_webhook
Comment on lines +1397 to +1398
Copy link
Contributor Author

@pattisdr pattisdr Sep 27, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the endpoint that returns data that was already uploaded for a webhook if applicable.

I load the webhook data in non-strict mode here if there's an issue, which really just returns the overlap between the saved fields and the new webhook field definitions.

Note that I don't update the data that is actually cached, I just return this "safe mode version". This keeps this endpoint idempotent in case it is hit multiple times or by different people. Regardless, until the webhook fields are updated by a user to match the latest webhook definition, the privacy request itself will not be able to be submitted, because it will also try to access the webhook data in "strict mode" and fail.

)
checked = False
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, "checked=False" was only returned if there was no data in the cache, so Chris's UI would show that that webhook still needed to be reviewed and the submit DSR button would be grayed out.

In addition to this use case, we now show checked=False if the webhook definition has changed, which prompts the user to have to re-review.

The variable name checked might need to be updated in the future, but I left it like this for now, because it's the field Chris is using in the FE.


return ManualWebhookData(checked=checked, fields=data)

Expand Down Expand Up @@ -1424,8 +1431,12 @@ async def resume_privacy_request_from_requires_input(
)
try:
for manual_webhook in access_manual_webhooks:
privacy_request.get_manual_webhook_input(manual_webhook)
except (NoCachedManualWebhookEntry, PydanticValidationError) as exc:
privacy_request.get_manual_webhook_input_strict(manual_webhook)
except (
NoCachedManualWebhookEntry,
PydanticValidationError,
ManualWebhookFieldsUnset,
) as exc:
raise HTTPException(
status_code=HTTP_400_BAD_REQUEST,
detail=f"Cannot resume privacy request. {exc}",
Expand Down
4 changes: 4 additions & 0 deletions src/fidesops/ops/common_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ class NoCachedManualWebhookEntry(BaseException):
"""No manual data exists for this webhook on the given privacy request."""


class ManualWebhookFieldsUnset(BaseException):
"""Manual webhook has fields that are not explicitly set: Likely new field has been added"""


class PrivacyRequestErasureEmailSendRequired(BaseException):
"""Erasure requests will need to be fulfilled by email send. Exception is raised to change ExecutionLog details"""

Expand Down
10 changes: 9 additions & 1 deletion src/fidesops/ops/models/manual_webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from fideslib.db.base_class import Base
from fideslib.schemas.base_class import BaseSchema
from pydantic import create_model
from pydantic import BaseConfig, create_model
from sqlalchemy import Column, ForeignKey, String, text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.mutable import MutableList
Expand Down Expand Up @@ -50,6 +50,14 @@ class Config:
)
return ManualWebhookValidationModel

@property
def fields_non_strict_schema(self) -> BaseSchema:
"""Returns a dynamic Pydantic Schema for webhook fields that can keep the overlap between
fields that are saved and fields that are defined here."""
schema: BaseSchema = self.fields_schema
schema.__config__ = BaseConfig # Extra is "ignore" on BaseConfig
return schema

@property
def empty_fields_dict(self) -> Dict[str, None]:
"""Return a dictionary that maps defined dsr_package_labels to None
Expand Down
62 changes: 51 additions & 11 deletions src/fidesops/ops/models/privacy_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from fidesops.ops.api.v1.scope_registry import PRIVACY_REQUEST_CALLBACK_RESUME
from fidesops.ops.common_exceptions import (
IdentityVerificationException,
ManualWebhookFieldsUnset,
NoCachedManualWebhookEntry,
PrivacyRequestPaused,
)
Expand Down Expand Up @@ -505,27 +506,50 @@ def cache_manual_webhook_input(
parsed_data.dict(),
)

def get_manual_webhook_input(
def get_manual_webhook_input_strict(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated this existing method to "strict mode" which used to just look for extra fields and/or no data cached, but now it also looks for missing fields.

self, manual_webhook: AccessManualWebhook
) -> Dict[str, Any]:
"""Retrieve manually added data that matches fields supplied in the specified manual webhook.
"""
Retrieves manual webhook fields saved to the privacy request in strict mode.
Fails either if extra saved fields are detected (webhook definition had fields removed) or fields were not
explicitly set (webhook definition had fields added). This mode lets us know if webhooks data needs to be re-uploaded.

This is for use by the *manual_webhook* connector which is *NOT* integrated with the garph.
This is for use by the *manual_webhook* connector which is *NOT* integrated with the graph.
"""
cache: FidesopsRedis = get_cache()
cached_results: Optional[
Optional[Dict[str, Any]]
] = cache.get_encoded_objects_by_prefix(
f"WEBHOOK_MANUAL_INPUT__{self.id}__{manual_webhook.id}"
cached_results: Optional[Dict[str, Any]] = _get_manual_input_from_cache(
privacy_request=self, manual_webhook=manual_webhook
)

if cached_results:
return manual_webhook.fields_schema.parse_obj(
list(cached_results.values())[0]
).dict()
data: Dict[str, Any] = manual_webhook.fields_schema.parse_obj(
cached_results
).dict(exclude_unset=True)
if set(data.keys()) != set(manual_webhook.fields_schema.__fields__.keys()):
raise ManualWebhookFieldsUnset(
f"Fields unset for privacy_request_id '{self.id}' for connection config '{manual_webhook.connection_config.key}'"
)
return data
raise NoCachedManualWebhookEntry(
f"No data cached for privacy_request_id '{self.id}' for connection config '{manual_webhook.connection_config.key}'"
)

def get_manual_webhook_input_non_strict(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a "non strict" mode to load webhook data where we really just preserve the overlap.

self, manual_webhook: AccessManualWebhook
) -> Dict[str, Any]:
"""Retrieves manual webhook fields saved to the privacy request in non-strict mode.
Returns None for any fields not explicitly set and ignores extra fields.

This is for use by the *manual_webhook* connector which is *NOT* integrated with the graph.
"""
cached_results: Optional[Dict[str, Any]] = _get_manual_input_from_cache(
privacy_request=self, manual_webhook=manual_webhook
)
if cached_results:
return manual_webhook.fields_non_strict_schema.parse_obj(
cached_results
).dict()
return manual_webhook.empty_fields_dict

def cache_manual_input(
self, collection: CollectionAddress, manual_rows: Optional[List[Row]]
) -> None:
Expand Down Expand Up @@ -713,6 +737,22 @@ def error_processing(self, db: Session) -> None:
)


def _get_manual_input_from_cache(
privacy_request: PrivacyRequest, manual_webhook: AccessManualWebhook
) -> Optional[Dict[str, Any]]:
"""Get raw manual input uploaded to the privacy request for the given webhook
from the cache without attempting to coerce into a Pydantic schema"""
cache: FidesopsRedis = get_cache()
cached_results: Optional[
Optional[Dict[str, Any]]
] = cache.get_encoded_objects_by_prefix(
f"WEBHOOK_MANUAL_INPUT__{privacy_request.id}__{manual_webhook.id}"
)
if cached_results:
return list(cached_results.values())[0]
return None


class ProvidedIdentityType(EnumType):
"""Enum for privacy request identity types"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
ClientUnsuccessfulException,
EmailDispatchException,
IdentityNotFoundException,
ManualWebhookFieldsUnset,
NoCachedManualWebhookEntry,
PrivacyRequestPaused,
)
Expand Down Expand Up @@ -94,9 +95,13 @@ def get_access_manual_webhook_inputs(
try:
for manual_webhook in AccessManualWebhook.get_enabled(db):
manual_inputs[manual_webhook.connection_config.key] = [
privacy_request.get_manual_webhook_input(manual_webhook)
privacy_request.get_manual_webhook_input_strict(manual_webhook)
]
except (NoCachedManualWebhookEntry, ValidationError) as exc:
except (
NoCachedManualWebhookEntry,
ValidationError,
ManualWebhookFieldsUnset,
) as exc:
logger.info(exc)
privacy_request.status = PrivacyRequestStatus.requires_input
privacy_request.save(db)
Expand Down
Loading