-
Notifications
You must be signed in to change notification settings - Fork 4.5k
[BEAM-7886] Make row coder a standard coder and implement in Python #9188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f80b50d
05ff238
de196e0
83322b9
38459c0
bbf46c2
f7ce06d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,16 +17,22 @@ | |
| */ | ||
| package org.apache.beam.runners.core.construction; | ||
|
|
||
| import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; | ||
|
|
||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import org.apache.beam.model.pipeline.v1.SchemaApi; | ||
| import org.apache.beam.sdk.coders.Coder; | ||
| import org.apache.beam.sdk.coders.IterableCoder; | ||
| import org.apache.beam.sdk.coders.KvCoder; | ||
| import org.apache.beam.sdk.coders.LengthPrefixCoder; | ||
| import org.apache.beam.sdk.coders.RowCoder; | ||
| import org.apache.beam.sdk.schemas.Schema; | ||
| import org.apache.beam.sdk.transforms.windowing.BoundedWindow; | ||
| import org.apache.beam.sdk.util.InstanceBuilder; | ||
| import org.apache.beam.sdk.util.WindowedValue; | ||
| import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder; | ||
| import org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.InvalidProtocolBufferException; | ||
| import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; | ||
|
|
||
| /** {@link CoderTranslator} implementations for known coder types. */ | ||
|
|
@@ -118,6 +124,33 @@ public FullWindowedValueCoder<?> fromComponents(List<Coder<?>> components) { | |
| }; | ||
| } | ||
|
|
||
| static CoderTranslator<RowCoder> row() { | ||
| return new CoderTranslator<RowCoder>() { | ||
| @Override | ||
| public List<? extends Coder<?>> getComponents(RowCoder from) { | ||
| return ImmutableList.of(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So for the time being, we're inlining everything, rather than using components. Was there a bug tracking doing better for this?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes right now there's just a fixed mapping from fieldtype to coder. There's not a bug filed for using components, I was thinking that we would just continue inlining everything. Do you think we should plan on using components instead? What does that get us?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For coders, if one had a coder T one was likely to have KV<K, T> for various K, an Iterable, WindowedValue for possibly several window types, and various other permutations. Coupled with the fact that leaf coders were often huge serialized blobs made for some pretty significant savings. Maybe this'll be less of an issue in the streaming world. I think it should not be a blocker assuming we'll be able to update this in the (short-term) future. |
||
| } | ||
|
|
||
| @Override | ||
| public byte[] getPayload(RowCoder from) { | ||
| return SchemaTranslation.schemaToProto(from.getSchema()).toByteArray(); | ||
| } | ||
|
|
||
| @Override | ||
| public RowCoder fromComponents(List<Coder<?>> components, byte[] payload) { | ||
| checkArgument( | ||
| components.isEmpty(), "Expected empty component list, but received: " + components); | ||
| Schema schema; | ||
| try { | ||
| schema = SchemaTranslation.fromProto(SchemaApi.Schema.parseFrom(payload)); | ||
| } catch (InvalidProtocolBufferException e) { | ||
| throw new RuntimeException("Unable to parse schema for RowCoder: ", e); | ||
| } | ||
| return RowCoder.of(schema); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| public abstract static class SimpleStructuredCoderTranslator<T extends Coder<?>> | ||
| implements CoderTranslator<T> { | ||
| @Override | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.