-
Notifications
You must be signed in to change notification settings - Fork 330
Implement read and write functionality for JSON col #2722
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -5178,7 +5178,7 @@ private TdsOperationStatus TryProcessTypeInfo(TdsParserStateObject stateObj, Sql | |||||
| } | ||||||
|
|
||||||
| // read the collation for 7.x servers | ||||||
| if (col.metaType.IsCharType && (tdsType != TdsEnums.SQLXMLTYPE)) | ||||||
| if (col.metaType.IsCharType && (tdsType != TdsEnums.SQLXMLTYPE) && ((tdsType != TdsEnums.SQLJSON))) | ||||||
| { | ||||||
| result = TryProcessCollation(stateObj, out col.collation); | ||||||
| if (result != TdsOperationStatus.Done) | ||||||
|
|
@@ -5958,6 +5958,7 @@ private TdsOperationStatus TryReadSqlStringValue(SqlBuffer value, byte type, int | |||||
| case TdsEnums.SQLVARCHAR: | ||||||
| case TdsEnums.SQLBIGVARCHAR: | ||||||
| case TdsEnums.SQLTEXT: | ||||||
| case TdsEnums.SQLJSON: | ||||||
| // If bigvarchar(max), we only read the first chunk here, | ||||||
| // expecting the caller to read the rest | ||||||
| if (encoding == null) | ||||||
|
|
@@ -6427,6 +6428,7 @@ internal TdsOperationStatus TryReadSqlValue(SqlBuffer value, SqlMetaDataPriv md, | |||||
| case TdsEnums.SQLNCHAR: | ||||||
| case TdsEnums.SQLNVARCHAR: | ||||||
| case TdsEnums.SQLNTEXT: | ||||||
| case TdsEnums.SQLJSON: | ||||||
| result = TryReadSqlStringValue(value, tdsType, length, md.encoding, isPlp, stateObj); | ||||||
| if (result != TdsOperationStatus.Done) | ||||||
| { | ||||||
|
|
@@ -7964,6 +7966,7 @@ internal TdsOperationStatus TryGetDataLength(SqlMetaDataPriv colmeta, TdsParserS | |||||
| colmeta.tdsType == TdsEnums.SQLBIGVARCHAR || | ||||||
| colmeta.tdsType == TdsEnums.SQLBIGVARBINARY || | ||||||
| colmeta.tdsType == TdsEnums.SQLNVARCHAR || | ||||||
| colmeta.tdsType == TdsEnums.SQLJSON || | ||||||
| // Large UDTs is WinFS-only | ||||||
| colmeta.tdsType == TdsEnums.SQLUDT, | ||||||
| "GetDataLength:Invalid streaming datatype"); | ||||||
|
|
@@ -9819,7 +9822,7 @@ private Task TDSExecuteRPCAddParameter(TdsParserStateObject stateObj, SqlParamet | |||||
| } | ||||||
| else if (mt.IsPlp) | ||||||
| { | ||||||
| if (mt.SqlDbType != SqlDbType.Xml) | ||||||
| if (mt.SqlDbType != SqlDbType.Xml && mt.SqlDbType != SqlDbTypeExtensions.Json) | ||||||
| WriteShort(TdsEnums.SQL_USHORTVARMAXLEN, stateObj); | ||||||
| } | ||||||
| else if ((!mt.IsVarTime) && (mt.SqlDbType != SqlDbType.Date)) | ||||||
|
|
@@ -9859,53 +9862,56 @@ private Task TDSExecuteRPCAddParameter(TdsParserStateObject stateObj, SqlParamet | |||||
|
|
||||||
| // write out collation or xml metadata | ||||||
|
|
||||||
| if (_is2005 && (mt.SqlDbType == SqlDbType.Xml)) | ||||||
| if ((mt.SqlDbType == SqlDbType.Xml || mt.SqlDbType == SqlDbTypeExtensions.Json)) | ||||||
| { | ||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase) || | ||||||
| !string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema) || | ||||||
| !string.IsNullOrEmpty(param.XmlSchemaCollectionName)) | ||||||
| if (mt.SqlDbType == SqlDbType.Xml) | ||||||
| { | ||||||
| stateObj.WriteByte(1); //Schema present flag | ||||||
|
|
||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase)) | ||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase) || | ||||||
| !string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema) || | ||||||
| !string.IsNullOrEmpty(param.XmlSchemaCollectionName)) | ||||||
| { | ||||||
| tempLen = (param.XmlSchemaCollectionDatabase).Length; | ||||||
| stateObj.WriteByte((byte)(tempLen)); | ||||||
| WriteString(param.XmlSchemaCollectionDatabase, tempLen, 0, stateObj); | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| stateObj.WriteByte(0); // No dbname | ||||||
| } | ||||||
| stateObj.WriteByte(1); //Schema present flag | ||||||
|
|
||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema)) | ||||||
| { | ||||||
| tempLen = (param.XmlSchemaCollectionOwningSchema).Length; | ||||||
| stateObj.WriteByte((byte)(tempLen)); | ||||||
| WriteString(param.XmlSchemaCollectionOwningSchema, tempLen, 0, stateObj); | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| stateObj.WriteByte(0); // no xml schema name | ||||||
| } | ||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase)) | ||||||
| { | ||||||
| tempLen = (param.XmlSchemaCollectionDatabase).Length; | ||||||
| stateObj.WriteByte((byte)(tempLen)); | ||||||
| WriteString(param.XmlSchemaCollectionDatabase, tempLen, 0, stateObj); | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| stateObj.WriteByte(0); // No dbname | ||||||
| } | ||||||
|
|
||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema)) | ||||||
| { | ||||||
| tempLen = (param.XmlSchemaCollectionOwningSchema).Length; | ||||||
| stateObj.WriteByte((byte)(tempLen)); | ||||||
| WriteString(param.XmlSchemaCollectionOwningSchema, tempLen, 0, stateObj); | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| stateObj.WriteByte(0); // no xml schema name | ||||||
| } | ||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionName)) | ||||||
| { | ||||||
| tempLen = (param.XmlSchemaCollectionName).Length; | ||||||
| WriteShort((short)(tempLen), stateObj); | ||||||
| WriteString(param.XmlSchemaCollectionName, tempLen, 0, stateObj); | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| WriteShort(0, stateObj); // No xml schema collection name | ||||||
| } | ||||||
|
|
||||||
| if (!string.IsNullOrEmpty(param.XmlSchemaCollectionName)) | ||||||
| { | ||||||
| tempLen = (param.XmlSchemaCollectionName).Length; | ||||||
| WriteShort((short)(tempLen), stateObj); | ||||||
| WriteString(param.XmlSchemaCollectionName, tempLen, 0, stateObj); | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| WriteShort(0, stateObj); // No xml schema collection name | ||||||
| stateObj.WriteByte(0); // No schema | ||||||
| } | ||||||
| } | ||||||
| else | ||||||
| { | ||||||
| stateObj.WriteByte(0); // No schema | ||||||
| } | ||||||
| } | ||||||
| else if (mt.IsCharType) | ||||||
| else if (mt.IsCharType && mt.SqlDbType != SqlDbTypeExtensions.Json) | ||||||
| { | ||||||
| // if it is not supplied, simply write out our default collation, otherwise, write out the one attached to the parameter | ||||||
| SqlCollation outCollation = (param.Collation != null) ? param.Collation : _defaultCollation; | ||||||
|
|
@@ -11485,10 +11491,18 @@ private Task WriteUnterminatedSqlValue(object value, MetaType type, int actualLe | |||||
| case TdsEnums.SQLNVARCHAR: | ||||||
| case TdsEnums.SQLNTEXT: | ||||||
| case TdsEnums.SQLXMLTYPE: | ||||||
| case TdsEnums.SQLJSON: | ||||||
|
|
||||||
| if (type.IsPlp) | ||||||
| { | ||||||
| if (IsBOMNeeded(type, value)) | ||||||
| if (type.NullableType == TdsEnums.SQLJSON) | ||||||
| { | ||||||
| // TODO : Performance and BOM check. Saurabh | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the work that is needed under the TODO?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Wraith2 2 ways of fixing this:
These are the solutions in my mind. However I have not looked at the encoding / text buffer APIs in detail. Any insights? This is the problem I had discussed on the call with you as well.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Definitely option 2. If we're working in netcore then we probably want to try and use https://learn.microsoft.com/en-gb/dotnet/api/system.text.unicode.utf8.fromutf16?view=net-8.0 which will allow us to use a ROS from the string to get incremental chunks of bytes from it and write to the packets directly. If we're only using ns2.0 level apis so we have netfx compatibility then sadly we're stuck with a UTF8 Encoder instance https://learn.microsoft.com/en-gb/dotnet/api/system.text.encoder.getbytes?view=net-8.0 which can be a little complicated to work with correctly.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm probably missing context/TDS knowledge here, but PLP seems ideal for large string data, no? You can have a single byte[] buffer, and repeatedly use Encoder.GetBytes (or Utf8.FromUtf16) to write out a chunk of the string into the buffer, prefixing it with the chunk length and sending that chunk out, no? PLP is one thing in TDS which (if I understand it correctly) is missing in the PG protocol: everything has to be fully length-prefixed, so when sending a long string, Npgsql has to either do a full pass just to calculate the exact byte length in advance, or continuously allocate more memory as we're encoding, which means arbitrarily-large memory requirements (not good). The ability to send a huge value in multiple chunks - each length-prefixed and without having to prefix the whole thing with a length - seems very useful here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, PLP is really useful for string data but the packetization requirements of tds also make it a but tricky. Any UTF16 codepoint can encode to 1..3 output bytes and typically if a 3 byte output is encountered and doesn't fit entirely in the output buffer it isn't output and the function would return only complete encodings. This is a problem because TDS packets must be filled to the last byte so you need to be able to get partial utf sequences. I was talking to @GrabYourPitchforks about this on discord yesterday because I wanted to make sure my recommendations were correct. The best idea that was suggested was to use the Convert function and pass it an output buffer specifically of (space in packet + 3) bytes, this ensures that if any multibyte sequences are present they will be fully present in the output and we can then handle taking the bytes we need and temporarily storing the remaining parts of the encoded output until the next packet.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the context @Wraith2, that helps! But doesn't Encoder.GetBytes() exist exactly for this kind of thing? In other words, if there are only two bytes left in the packet buffer and the next character needs 3, wouldn't it populate 2 bytes and allow you to write the 3rd byte in the next one, after having sent the packet?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. That's why I suggested using an Encoder instance. It was pointed out to me that using Convert may not need an instance though which saves an allocation.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense, thanks. Worst case, keeping an Encoder instance per physical connection and reusing it should also be fine (as I'm assuming no need for concurrent writing).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like we have a path forward. @apoorvdeshmukh can you open a followup issue to tackle this performance improvement and add the link in the comment. We can tackle it in the next wave of change for JSON.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Raised issue #2732 to track this. |
||||||
| byte[] jsonAsBytes = Encoding.UTF8.GetBytes(value.ToString()); | ||||||
| WriteInt(jsonAsBytes.Length, stateObj); | ||||||
| return stateObj.WriteByteArray(jsonAsBytes, jsonAsBytes.Length, 0, canAccumulate: false); | ||||||
| } | ||||||
| else if (IsBOMNeeded(type, value)) | ||||||
| { | ||||||
| WriteInt(actualLength + 2, stateObj); // chunk length | ||||||
| WriteShort(TdsEnums.XMLUNICODEBOM, stateObj); | ||||||
|
|
@@ -12135,6 +12149,7 @@ private Task WriteUnterminatedValue(object value, MetaType type, byte scale, int | |||||
| case TdsEnums.SQLNVARCHAR: | ||||||
| case TdsEnums.SQLNTEXT: | ||||||
| case TdsEnums.SQLXMLTYPE: | ||||||
| case TdsEnums.SQLJSON: | ||||||
| { | ||||||
| Debug.Assert(!isDataFeed || (value is TextDataFeed || value is XmlDataFeed), "Value must be a TextReader or XmlReader"); | ||||||
| Debug.Assert(isDataFeed || (value is string || value is byte[]), "Value is a byte array or string"); | ||||||
|
|
@@ -12156,7 +12171,14 @@ private Task WriteUnterminatedValue(object value, MetaType type, byte scale, int | |||||
| { | ||||||
| if (type.IsPlp) | ||||||
| { | ||||||
| if (IsBOMNeeded(type, value)) | ||||||
| if (type.NullableType == TdsEnums.SQLJSON) | ||||||
| { | ||||||
| // TODO : Performance and BOM check. Saurabh | ||||||
| byte[] jsonAsBytes = Encoding.UTF8.GetBytes((string)value); | ||||||
| WriteInt(jsonAsBytes.Length, stateObj); | ||||||
| return stateObj.WriteByteArray(jsonAsBytes, jsonAsBytes.Length, 0, canAccumulate: false); | ||||||
| } | ||||||
| else if (IsBOMNeeded(type, value)) | ||||||
| { | ||||||
| WriteInt(actualLength + 2, stateObj); // chunk length | ||||||
| WriteShort(TdsEnums.XMLUNICODEBOM, stateObj); | ||||||
|
|
@@ -12662,7 +12684,7 @@ internal void WriteParameterVarLen(MetaType type, int size, bool isNull, TdsPars | |||||
| WriteInt(unchecked((int)TdsEnums.VARLONGNULL), stateObj); | ||||||
| } | ||||||
| } | ||||||
| else if (type.NullableType == TdsEnums.SQLXMLTYPE || unknownLength) | ||||||
| else if (type.NullableType is TdsEnums.SQLXMLTYPE or TdsEnums.SQLJSON || unknownLength) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| { | ||||||
| WriteUnsignedLong(TdsEnums.SQL_PLP_UNKNOWNLEN, stateObj); | ||||||
| } | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is needed for data writes, isnt it ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I included this change as it was needed as a fix for reads to work on top of write changes.