Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog/8113-attachment-upload-schema-storage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
type: Added
description: ENG-3517 foundation - `FileUploadCustomPrivacyRequestField` Privacy Center custom-field variant, magic-byte sniff catalog (`FilesMagicBytes`, `AllowedFileType` helpers, `FileUploadConstraints`), URL constant + config knobs, and `PrivacyRequestService` extension hooks for downstream attachment-promotion. Data-layer half (model + repo) ships in #8110; upload service + endpoint live in fidesplus.
pr: 8113
labels: []
13 changes: 13 additions & 0 deletions src/fides/api/schemas/attachment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Pydantic schemas for data-subject-uploaded attachments."""

from pydantic import ConfigDict, Field

from fides.api.schemas.base_class import FidesSchema


class PrivacyRequestAttachment(FidesSchema):
"""Upload response — echo ``id`` back in the custom field's ``value``."""

model_config = ConfigDict(extra="forbid")

id: str = Field(description="AttachmentUserProvided row id.")
49 changes: 49 additions & 0 deletions src/fides/api/schemas/privacy_center_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,52 @@ def validate_location_field(cls, values: Dict[str, Any]) -> Dict[str, Any]:
return values


def _default_file_max_size_bytes() -> int:
# Local import: avoid pulling storage modules at schema import time.
from fides.api.service.storage.util import DEFAULT_FILE_MAX_SIZE_BYTES

return DEFAULT_FILE_MAX_SIZE_BYTES


def _default_allowed_file_types() -> list[str]:
from fides.api.service.storage.util import AllowedFileType

return sorted(AllowedFileType.default_public_upload_allowed_file_types())


class FileUploadCustomPrivacyRequestField(BaseCustomPrivacyRequestField):
"""File upload field. ``max_size_bytes`` and ``allowed_file_types``
drive client hints and per-field upload enforcement."""

field_type: Literal["file"] = "file"
required: Optional[bool] = False
max_size_bytes: int = Field(default_factory=_default_file_max_size_bytes, gt=0)
allowed_file_types: list[str] = Field(default_factory=_default_allowed_file_types)

@model_validator(mode="before")
@classmethod
def validate_file_field(cls, values: dict[str, Any]) -> dict[str, Any]:
if values.get("options"):
raise ValueError("file fields do not support options")
return values

@field_validator("allowed_file_types")
@classmethod
def validate_allowed_file_types(cls, v: list[str]) -> list[str]:
from fides.api.service.storage.util import AllowedFileType

supported = AllowedFileType.supported_file_types()
if not v:
raise ValueError("allowed_file_types must not be empty")
unsupported = [ext for ext in v if ext not in supported]
if unsupported:
raise ValueError(
f"Unsupported file types: {sorted(unsupported)}. "
f"Supported: {sorted(supported)}"
)
return v


# Create a discriminated union type using the field_type to properly distinguish between types
def get_field_type_discriminator(v: Any) -> str:
"""Discriminator function for CustomPrivacyRequestFieldUnion"""
Expand All @@ -117,12 +163,15 @@ def get_field_type_discriminator(v: Any) -> str:

if field_type == "location":
return "location"
if field_type == "file":
return "file"
return "custom"


CustomPrivacyRequestFieldUnion = Annotated[
Union[
Annotated[LocationCustomPrivacyRequestField, Tag("location")],
Annotated[FileUploadCustomPrivacyRequestField, Tag("file")],
Annotated[CustomPrivacyRequestField, Tag("custom")],
],
Discriminator(get_field_type_discriminator),
Expand Down
96 changes: 96 additions & 0 deletions src/fides/api/service/storage/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum as EnumType
from typing import Any, Callable, Optional
from urllib.parse import quote
Expand All @@ -8,6 +9,47 @@

from fides.api.util.storage_util import format_size

DEFAULT_FILE_MAX_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB


class FilesMagicBytes:
"""Magic-byte signatures keyed by file extension."""

SIGNATURES: dict[str, bytes] = {
"pdf": b"%PDF",
"doc": b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
"docx": b"PK\x03\x04",
"jpg": b"\xff\xd8\xff",
"jpeg": b"\xff\xd8\xff",
"png": b"\x89PNG\r\n\x1a\n",
"xls": b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
"xlsx": b"PK\x03\x04",
"zip": b"PK\x03\x04",
}

@classmethod
def candidates(cls, data: bytes) -> set[str]:
"""All extensions whose magic prefix matches ``data``.

The shared ZIP container family (``docx``, ``xlsx``, ``zip``, ...)
all match ``PK\\x03\\x04`` so this returns a set; callers
disambiguate by intersecting with their own allow-list rather
than relying on dict-iteration order.
"""
return {
ext for ext, magic in cls.SIGNATURES.items() if data[: len(magic)] == magic
}

@classmethod
def extensions_without_magic(cls) -> set[str]:
"""Supported extensions that have no magic-byte signature (CSV,
TXT). Callers fall back to the client-claimed filename for these
only — types with a real signature stay magic-byte-authoritative
so a malicious file cannot bypass validation by claiming a
misleading extension."""
return AllowedFileType.supported_file_types() - set(cls.SIGNATURES.keys())


# This is the max file size for downloading the content of an attachment.
# This is an industry standard used by companies like Google and Microsoft.
LARGE_FILE_THRESHOLD = 2 * 1024 * 1024 * 1024 # 2 GB
Expand All @@ -30,6 +72,60 @@ class AllowedFileType(EnumType):
csv = "text/csv"
zip = "application/zip"

@classmethod
def default_public_upload_allowed_file_types(cls) -> set[str]:
"""Default extensions accepted on public (unauthenticated) upload endpoints."""
return {"pdf", "jpg", "png"}

@classmethod
def supported_file_types(cls) -> set[str]:
"""File extensions that have a known ``AllowedFileType`` enum entry."""
return set(cls.__members__.keys())


MIME_TO_EXTENSION: dict[str, str] = {
member.value: member.name for member in AllowedFileType
}


@dataclass(frozen=True)
class FileUploadConstraints:
"""Resolved upload constraints; self-validates ``allowed_file_types``
against :class:`AllowedFileType` keys."""

max_size_bytes: int
allowed_file_types: frozenset[str]

def __post_init__(self) -> None:
if self.max_size_bytes <= 0:
raise ValueError("max_size_bytes must be greater than 0")
if not self.allowed_file_types:
raise ValueError("allowed_file_types must not be empty")
supported = AllowedFileType.supported_file_types()
unsupported = self.allowed_file_types - supported
if unsupported:
raise ValueError(
f"Unsupported file types: {sorted(unsupported)}. "
f"Supported: {sorted(supported)}"
)

@classmethod
def defaults(cls) -> "FileUploadConstraints":
return cls(
max_size_bytes=DEFAULT_FILE_MAX_SIZE_BYTES,
allowed_file_types=frozenset(
AllowedFileType.default_public_upload_allowed_file_types()
),
)


def extension_for_mime(mime: str) -> str:
"""Return the file extension matching an allowed MIME (without leading dot)."""
try:
return MIME_TO_EXTENSION[mime]
except KeyError as exc:
raise ValueError(f"No extension registered for MIME {mime!r}") from exc


LOCAL_FIDES_UPLOAD_DIRECTORY = "fides_uploads"

Expand Down
1 change: 1 addition & 0 deletions src/fides/common/urn_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
PRIVACY_REQUEST_BATCH_EMAIL_SEND = (
"/privacy-request/administrate/process-awaiting-email-send"
)
PRIVACY_REQUEST_ATTACHMENT = "/privacy-request/attachment"
PRIVACY_REQUEST_AUTHENTICATED = "/privacy-request/authenticated"
PRIVACY_REQUEST_BULK_FINALIZE = "/privacy-request/bulk/finalize"
PRIVACY_REQUEST_BULK_RETRY = "/privacy-request/bulk/retry"
Expand Down
4 changes: 4 additions & 0 deletions src/fides/config/execution_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ class ExecutionSettings(FidesSettings):
default=False,
description="Allows custom privacy request fields to be used in request execution.",
)
allow_custom_privacy_request_file_upload: bool = Field(
default=False,
description="Allows file uploads to be attached to incoming privacy requests.",
)
request_task_ttl: int = Field(
default=604800,
description="The number of seconds a request task should live.",
Expand Down
10 changes: 9 additions & 1 deletion src/fides/config/security_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ class SecuritySettings(FidesSettings):
default="10/minute",
description="The number of authentication requests from a single IP address allowed to hit authentication endpoints (login, OAuth token) within the specified time period.",
)
privacy_request_attachment_rate_limit: str = Field(
default="30/minute",
description="Per-IP rate limit for the unauthenticated privacy-request attachment upload endpoint. Tighter than the global limit because the endpoint accepts large multipart bodies.",
)
root_user_scopes: List[str] = Field(
default=SCOPE_REGISTRY,
description="The list of scopes that are given to the root user.",
Expand Down Expand Up @@ -302,7 +306,11 @@ def validate_rate_limit_client_ip_header(
)
return v

@field_validator("request_rate_limit", "auth_rate_limit")
@field_validator(
"request_rate_limit",
"auth_rate_limit",
"privacy_request_attachment_rate_limit",
)
@classmethod
def validate_rate_limits(
cls,
Expand Down
82 changes: 70 additions & 12 deletions src/fides/service/privacy_request/privacy_request_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,18 +196,19 @@ def _fetch_privacy_requests_for_bulk_operation(
return {pr.id: pr for pr in privacy_requests}

def _validate_required_location_fields(
self, privacy_request_data: PrivacyRequestCreate
self,
privacy_request_data: PrivacyRequestCreate,
action: Optional[PrivacyRequestOption],
) -> None:
"""Validate that location is provided for required location fields.

Looks up the actual Privacy Center configuration to check if any location
fields are marked as required for the specified policy.
Caller resolves ``action`` once via :meth:`_resolve_action_for_request`
and passes it in; ``None`` short-circuits.
"""
# If location is already provided, no validation needed
if privacy_request_data.location:
return

action = self._resolve_action_for_request(privacy_request_data)
if not action or not action.custom_privacy_request_fields:
return

Expand Down Expand Up @@ -317,11 +318,15 @@ def _resolve_action_for_request(
return action

def _validate_field_visibility(
self, privacy_request_data: PrivacyRequestCreate
self,
privacy_request_data: PrivacyRequestCreate,
action: Optional[PrivacyRequestOption],
) -> None:
"""Reject payloads that violate the action's display_condition
contract; translate :class:`DisplayConditionViolation` to ``PrivacyRequestError``."""
action = self._resolve_action_for_request(privacy_request_data)
contract; translate :class:`DisplayConditionViolation` to
``PrivacyRequestError``. Caller resolves ``action`` once via
:meth:`_resolve_action_for_request` and passes it in; ``None``
short-circuits."""
if not action or not action.custom_privacy_request_fields:
return

Expand Down Expand Up @@ -372,6 +377,26 @@ def _is_required_location_missing(

return False

def _resolve_attachment_state(
self,
privacy_request_data: PrivacyRequestCreate,
action: Optional[PrivacyRequestOption],
) -> tuple[PrivacyRequestCreate, Any]:
"""Extension hook (no-op by default). fidesplus overrides this to
resolve file-field attachments and strip them from the persisted
payload. Returns ``(possibly-stripped data, opaque state)``; the
state is forwarded to :meth:`_promote_attachment_state`."""
return privacy_request_data, None

def _promote_attachment_state(
self,
privacy_request: PrivacyRequest,
attachment_state: Any,
) -> None:
"""Extension hook (no-op by default). fidesplus overrides this to
promote resolved attachments after the request row exists. Raising
triggers the rollback path in :meth:`create_privacy_request`."""

# pylint: disable=too-many-branches, too-many-statements
def create_privacy_request(
self,
Expand Down Expand Up @@ -412,11 +437,15 @@ def create_privacy_request(
privacy_request_data.model_dump(mode="json"),
)

# Validate location is provided for required location fields
self._validate_required_location_fields(privacy_request_data)

# Validate display_condition visibility: no gated-off fields submitted,
self._validate_field_visibility(privacy_request_data)
# Resolve the action once and forward to validators + hooks below.
# Visibility runs before the attachment hook so required FileUpload
# fields are seen as having a submitted value.
action = self._resolve_action_for_request(privacy_request_data)
self._validate_required_location_fields(privacy_request_data, action)
self._validate_field_visibility(privacy_request_data, action)
privacy_request_data, attachment_state = self._resolve_attachment_state(
privacy_request_data, action
)

policy = Policy.get_by(
db=self.db,
Expand Down Expand Up @@ -505,6 +534,31 @@ def create_privacy_request(
)
privacy_request.persist_masking_secrets(masking_secrets)

# Promote resolved attachments. ``delete()`` commits, so the
# invariant assumed here is that the request row is the only
# DB-side write pending on ``self.db`` (cache + masking
# secrets target Redis, not the DB).
try:
self._promote_attachment_state(privacy_request, attachment_state)
except Exception as promotion_exc:
logger.exception(
"Attachment promotion failed for privacy request {}; "
"deleting to preserve 'files required' invariant",
privacy_request.id,
)
try:
privacy_request.delete(self.db)
except Exception:
logger.exception(
"Failed to delete privacy request {} after promotion failure",
privacy_request.id,
)
# Generic user-facing message — promotion_exc may include
# storage paths/object keys; chained via __cause__ for ops.
raise PrivacyRequestError(
"Attachment processing failed.", kwargs
) from promotion_exc

check_and_dispatch_error_notifications(db=self.db)

_handle_notifications_and_processing(
Expand All @@ -530,6 +584,10 @@ def create_privacy_request(
raise PrivacyRequestError(
"Verification message could not be sent.", kwargs
) from exc
except PrivacyRequestError:
# Already carries a specific reason (e.g. attachment promotion
# failure) - don't rewrap with the generic message below.
raise
except Exception as exc:
logger.error(f"{exc.__class__.__name__}: {str(exc)}")
raise PrivacyRequestError("This record could not be added", kwargs) from exc
Expand Down
Loading
Loading