-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Simplify the serialization of ScalarValue::List
#3547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -714,6 +714,9 @@ message Union{ | |
| } | ||
|
|
||
| message ScalarListValue{ | ||
| // encode null explicitly to distinguish a list with a null value | ||
| // from a list with no values) | ||
| bool is_null = 3; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope to remove this when I remove |
||
| Field field = 1; | ||
| repeated ScalarValue values = 2; | ||
| } | ||
|
|
@@ -768,7 +771,7 @@ message ScalarValue{ | |
| //Literal Date32 value always has a unit of day | ||
| int32 date_32_value = 14; | ||
| ScalarListValue list_value = 17; | ||
| ScalarType null_list_value = 18; | ||
| //WAS: ScalarType null_list_value = 18; | ||
|
|
||
| Decimal128 decimal128_value = 20; | ||
| int64 date_64_value = 21; | ||
|
|
@@ -825,17 +828,6 @@ enum PrimitiveScalarType{ | |
| TIME64 = 27; | ||
| } | ||
|
|
||
| message ScalarType{ | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure why but the previous code was encoding |
||
| oneof datatype{ | ||
| PrimitiveScalarType scalar = 1; | ||
| ScalarListType list = 2; | ||
| } | ||
| } | ||
|
|
||
| message ScalarListType{ | ||
| repeated string field_names = 3; | ||
| PrimitiveScalarType deepest_type = 2; | ||
| } | ||
|
|
||
| // Broke out into multiple message types so that type | ||
| // metadata did not need to be in separate message | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -95,10 +95,6 @@ impl Error { | |
| Error::MissingRequiredField(field.into()) | ||
| } | ||
|
|
||
| fn at_least_one(field: impl Into<String>) -> Error { | ||
| Error::AtLeastOneValue(field.into()) | ||
| } | ||
|
|
||
| fn unknown(name: impl Into<String>, value: i32) -> Error { | ||
| Error::UnknownEnumVariant { | ||
| name: name.into(), | ||
|
|
@@ -559,56 +555,6 @@ impl TryFrom<&i32> for protobuf::AggregateFunction { | |
| } | ||
| } | ||
|
|
||
| impl TryFrom<&protobuf::scalar_type::Datatype> for DataType { | ||
| type Error = Error; | ||
|
|
||
| fn try_from( | ||
| scalar_type: &protobuf::scalar_type::Datatype, | ||
| ) -> Result<Self, Self::Error> { | ||
| use protobuf::scalar_type::Datatype; | ||
|
|
||
| Ok(match scalar_type { | ||
| Datatype::Scalar(scalar_type) => { | ||
| protobuf::PrimitiveScalarType::try_from(scalar_type)?.into() | ||
| } | ||
| Datatype::List(protobuf::ScalarListType { | ||
| deepest_type, | ||
| field_names, | ||
| }) => { | ||
| if field_names.is_empty() { | ||
| return Err(Error::at_least_one("field_names")); | ||
| } | ||
| let field_type = | ||
| protobuf::PrimitiveScalarType::try_from(deepest_type)?.into(); | ||
| // Because length is checked above it is safe to unwrap .last() | ||
| let mut scalar_type = DataType::List(Box::new(Field::new( | ||
| field_names.last().unwrap().as_str(), | ||
| field_type, | ||
| true, | ||
| ))); | ||
| // Iterate over field names in reverse order except for the last item in the vector | ||
| for name in field_names.iter().rev().skip(1) { | ||
| let new_datatype = DataType::List(Box::new(Field::new( | ||
| name.as_str(), | ||
| scalar_type, | ||
| true, | ||
| ))); | ||
| scalar_type = new_datatype; | ||
| } | ||
| scalar_type | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| impl TryFrom<&protobuf::ScalarType> for DataType { | ||
| type Error = Error; | ||
|
|
||
| fn try_from(scalar: &protobuf::ScalarType) -> Result<Self, Self::Error> { | ||
| scalar.datatype.as_ref().required("datatype") | ||
| } | ||
| } | ||
|
|
||
| impl TryFrom<&protobuf::Schema> for Schema { | ||
| type Error = Error; | ||
|
|
||
|
|
@@ -676,36 +622,6 @@ impl TryFrom<&protobuf::PrimitiveScalarType> for ScalarValue { | |
| } | ||
| } | ||
|
|
||
| impl TryFrom<&protobuf::ScalarListType> for DataType { | ||
| type Error = Error; | ||
| fn try_from(scalar: &protobuf::ScalarListType) -> Result<Self, Self::Error> { | ||
| use protobuf::PrimitiveScalarType; | ||
|
|
||
| let protobuf::ScalarListType { | ||
| deepest_type, | ||
| field_names, | ||
| } = scalar; | ||
|
|
||
| let depth = field_names.len(); | ||
| if depth == 0 { | ||
| return Err(Error::at_least_one("field_names")); | ||
| } | ||
|
|
||
| let mut curr_type = Self::List(Box::new(Field::new( | ||
| // Since checked vector is not empty above this is safe to unwrap | ||
| field_names.last().unwrap(), | ||
| PrimitiveScalarType::try_from(deepest_type)?.into(), | ||
| true, | ||
| ))); | ||
| // Iterates over field names in reverse order except for the last item in the vector | ||
| for name in field_names.iter().rev().skip(1) { | ||
| let temp_curr_type = Self::List(Box::new(Field::new(name, curr_type, true))); | ||
| curr_type = temp_curr_type; | ||
| } | ||
| Ok(curr_type) | ||
| } | ||
| } | ||
|
|
||
| impl TryFrom<&protobuf::ScalarValue> for ScalarValue { | ||
| type Error = Error; | ||
|
|
||
|
|
@@ -734,23 +650,23 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { | |
| Value::Date32Value(v) => Self::Date32(Some(*v)), | ||
| Value::ListValue(scalar_list) => { | ||
| let protobuf::ScalarListValue { | ||
| is_null, | ||
| values, | ||
| field: opt_field, | ||
| field, | ||
| } = &scalar_list; | ||
|
|
||
| let field = opt_field.as_ref().required("field")?; | ||
| let field: Field = field.as_ref().required("field")?; | ||
| let field = Box::new(field); | ||
|
|
||
| let typechecked_values: Vec<ScalarValue> = values | ||
| .iter() | ||
| .map(|val| val.try_into()) | ||
| .collect::<Result<Vec<_>, _>>()?; | ||
| let values: Result<Vec<ScalarValue>, Error> = | ||
| values.iter().map(|val| val.try_into()).collect(); | ||
| let values = values?; | ||
|
|
||
| Self::List(Some(typechecked_values), field) | ||
| } | ||
| Value::NullListValue(v) => { | ||
| let field = Field::new("item", v.try_into()?, true); | ||
| Self::List(None, Box::new(field)) | ||
| validate_list_values(field.as_ref(), &values)?; | ||
|
|
||
| let values = if *is_null { None } else { Some(values) }; | ||
|
|
||
| Self::List(values, field) | ||
| } | ||
| Value::NullValue(v) => { | ||
| let null_type_enum = protobuf::PrimitiveScalarType::try_from(v)?; | ||
|
|
@@ -840,6 +756,23 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { | |
| } | ||
| } | ||
|
|
||
| /// Ensures that all `values` are of type DataType::List and have the | ||
| /// same type as field | ||
| fn validate_list_values(field: &Field, values: &[ScalarValue]) -> Result<(), Error> { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure how valuable it is to do this validation if serialized
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be a weird edge case but if it did happen it would be hard to debug from whatever error happened downstream so I think it's worthwhile to do here. |
||
| for value in values { | ||
| let field_type = field.data_type(); | ||
| let value_type = value.get_datatype(); | ||
|
|
||
| if field_type != &value_type { | ||
| return Err(proto_error(format!( | ||
| "Expected field type {:?}, got scalar of type: {:?}", | ||
| field_type, value_type | ||
| ))); | ||
| } | ||
| } | ||
| Ok(()) | ||
| } | ||
|
|
||
| pub fn parse_expr( | ||
| proto: &protobuf::LogicalExprNode, | ||
| registry: &dyn FunctionRegistry, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe much of the complexity of the code to serialize List values was related to trying to figure out NULL values. Encoding it explicitly makes the code much simpler
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is a null even being encoded at this level, and not using the null_value support at the ScalarValue level?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The short answer is I don't know -- the longer answer is that the
null_valuesupport at the scalar level requires a strange mirror type (PrimitiveScalarValue) which doesn't have the same support for types as the normalDataType(specifically it doesn't have aListequivalent) -- I am working in the background to remove that structure in #3612 and I will also try to remove this explict null coding as well