Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion sdks/python/apache_beam/yaml/yaml_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ def write_to_iceberg(
drop: Optional[Iterable[str]] = None,
only: Optional[str] = None,
distribution_mode: Optional[str] = None,
autosharding: Optional[bool] = None,
Comment thread
ahmedabu98 marked this conversation as resolved.
):
# TODO(robertwb): It'd be nice to derive this list of parameters, along with
# their types and docs, programmatically from the iceberg (or managed)
Expand Down Expand Up @@ -616,6 +617,11 @@ def write_to_iceberg(
distributions:
- none: don't shuffle rows (default)
- hash: shuffle rows by partition key before writing data
autosharding: Enables dynamic sharding to automatically adjust the number
of parallel writers based on data volume. It handles data skew by
further sub-dividing partitions into multiple shards to prevent
bottlenecks during high-throughput writes. Only available with 'hash'
distribution mode.
"""
return beam.managed.Write(
"iceberg",
Expand All @@ -630,7 +636,8 @@ def write_to_iceberg(
keep=keep,
drop=drop,
only=only,
distribution_mode=distribution_mode))
distribution_mode=distribution_mode,
autosharding=autosharding))


def io_providers():
Expand Down
Loading