Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Data.SqlClient.SNI.runtime" Version="$(MicrosoftDataSqlClientSNIRuntimeVersion)" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="$(MicrosoftExtensionsCachingMemoryVersion)" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="$(MicrosoftExtensionsCachingMemoryVersion)" />
<!-- Enable the project reference for debugging purposes. -->
<!-- <ProjectReference Include="$(SqlServerSourceCode)\Microsoft.SqlServer.Server.csproj" /> -->
<PackageReference Include="Microsoft.SqlServer.Server" Version="$(MicrosoftSqlServerServerVersion)" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6471,7 +6471,7 @@ internal string BuildParamList(TdsParser parser, SqlParameterCollection paramete
paramList.Append(size);
paramList.Append(')');
}
else if (mt.IsPlp && (mt.SqlDbType != SqlDbType.Xml) && (mt.SqlDbType != SqlDbType.Udt))
else if (mt.IsPlp && (mt.SqlDbType != SqlDbType.Xml) && (mt.SqlDbType != SqlDbType.Udt) && (mt.SqlDbType != SqlDbTypeExtensions.Json))
{
paramList.Append("(max) ");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5178,7 +5178,7 @@ private TdsOperationStatus TryProcessTypeInfo(TdsParserStateObject stateObj, Sql
}

// read the collation for 7.x servers
if (col.metaType.IsCharType && (tdsType != TdsEnums.SQLXMLTYPE))
if (col.metaType.IsCharType && (tdsType != TdsEnums.SQLXMLTYPE) && ((tdsType != TdsEnums.SQLJSON)))
{
result = TryProcessCollation(stateObj, out col.collation);
if (result != TdsOperationStatus.Done)
Expand Down Expand Up @@ -5958,6 +5958,7 @@ private TdsOperationStatus TryReadSqlStringValue(SqlBuffer value, byte type, int
case TdsEnums.SQLVARCHAR:
case TdsEnums.SQLBIGVARCHAR:
case TdsEnums.SQLTEXT:
case TdsEnums.SQLJSON:
// If bigvarchar(max), we only read the first chunk here,
// expecting the caller to read the rest
if (encoding == null)
Expand Down Expand Up @@ -6427,6 +6428,7 @@ internal TdsOperationStatus TryReadSqlValue(SqlBuffer value, SqlMetaDataPriv md,
case TdsEnums.SQLNCHAR:
case TdsEnums.SQLNVARCHAR:
case TdsEnums.SQLNTEXT:
case TdsEnums.SQLJSON:
result = TryReadSqlStringValue(value, tdsType, length, md.encoding, isPlp, stateObj);
if (result != TdsOperationStatus.Done)
{
Expand Down Expand Up @@ -7964,6 +7966,7 @@ internal TdsOperationStatus TryGetDataLength(SqlMetaDataPriv colmeta, TdsParserS
colmeta.tdsType == TdsEnums.SQLBIGVARCHAR ||
colmeta.tdsType == TdsEnums.SQLBIGVARBINARY ||
colmeta.tdsType == TdsEnums.SQLNVARCHAR ||
colmeta.tdsType == TdsEnums.SQLJSON ||
// Large UDTs is WinFS-only
colmeta.tdsType == TdsEnums.SQLUDT,
"GetDataLength:Invalid streaming datatype");
Expand Down Expand Up @@ -9819,7 +9822,7 @@ private Task TDSExecuteRPCAddParameter(TdsParserStateObject stateObj, SqlParamet
}
else if (mt.IsPlp)
{
if (mt.SqlDbType != SqlDbType.Xml)
if (mt.SqlDbType != SqlDbType.Xml && mt.SqlDbType != SqlDbTypeExtensions.Json)
WriteShort(TdsEnums.SQL_USHORTVARMAXLEN, stateObj);
}
else if ((!mt.IsVarTime) && (mt.SqlDbType != SqlDbType.Date))
Expand Down Expand Up @@ -9859,53 +9862,56 @@ private Task TDSExecuteRPCAddParameter(TdsParserStateObject stateObj, SqlParamet

// write out collation or xml metadata

if (_is2005 && (mt.SqlDbType == SqlDbType.Xml))
if ((mt.SqlDbType == SqlDbType.Xml || mt.SqlDbType == SqlDbTypeExtensions.Json))
{
if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase) ||
!string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema) ||
!string.IsNullOrEmpty(param.XmlSchemaCollectionName))
if (mt.SqlDbType == SqlDbType.Xml)
{
stateObj.WriteByte(1); //Schema present flag

if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase))
if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase) ||
!string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema) ||
!string.IsNullOrEmpty(param.XmlSchemaCollectionName))
{
tempLen = (param.XmlSchemaCollectionDatabase).Length;
stateObj.WriteByte((byte)(tempLen));
WriteString(param.XmlSchemaCollectionDatabase, tempLen, 0, stateObj);
}
else
{
stateObj.WriteByte(0); // No dbname
}
stateObj.WriteByte(1); //Schema present flag

if (!string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema))
{
tempLen = (param.XmlSchemaCollectionOwningSchema).Length;
stateObj.WriteByte((byte)(tempLen));
WriteString(param.XmlSchemaCollectionOwningSchema, tempLen, 0, stateObj);
}
else
{
stateObj.WriteByte(0); // no xml schema name
}
if (!string.IsNullOrEmpty(param.XmlSchemaCollectionDatabase))
{
tempLen = (param.XmlSchemaCollectionDatabase).Length;
stateObj.WriteByte((byte)(tempLen));
WriteString(param.XmlSchemaCollectionDatabase, tempLen, 0, stateObj);
}
else
{
stateObj.WriteByte(0); // No dbname
}

if (!string.IsNullOrEmpty(param.XmlSchemaCollectionOwningSchema))
{
tempLen = (param.XmlSchemaCollectionOwningSchema).Length;
stateObj.WriteByte((byte)(tempLen));
WriteString(param.XmlSchemaCollectionOwningSchema, tempLen, 0, stateObj);
}
else
{
stateObj.WriteByte(0); // no xml schema name
}
if (!string.IsNullOrEmpty(param.XmlSchemaCollectionName))
{
tempLen = (param.XmlSchemaCollectionName).Length;
WriteShort((short)(tempLen), stateObj);
WriteString(param.XmlSchemaCollectionName, tempLen, 0, stateObj);
}
else
{
WriteShort(0, stateObj); // No xml schema collection name
}

if (!string.IsNullOrEmpty(param.XmlSchemaCollectionName))
{
tempLen = (param.XmlSchemaCollectionName).Length;
WriteShort((short)(tempLen), stateObj);
WriteString(param.XmlSchemaCollectionName, tempLen, 0, stateObj);
}
else
{
WriteShort(0, stateObj); // No xml schema collection name
stateObj.WriteByte(0); // No schema
}
}
else
{
stateObj.WriteByte(0); // No schema
}
}
else if (mt.IsCharType)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed for data writes, isnt it ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I included this change as it was needed as a fix for reads to work on top of write changes.

else if (mt.IsCharType && mt.SqlDbType != SqlDbTypeExtensions.Json)
{
// if it is not supplied, simply write out our default collation, otherwise, write out the one attached to the parameter
SqlCollation outCollation = (param.Collation != null) ? param.Collation : _defaultCollation;
Expand Down Expand Up @@ -11485,10 +11491,18 @@ private Task WriteUnterminatedSqlValue(object value, MetaType type, int actualLe
case TdsEnums.SQLNVARCHAR:
case TdsEnums.SQLNTEXT:
case TdsEnums.SQLXMLTYPE:
case TdsEnums.SQLJSON:

if (type.IsPlp)
{
if (IsBOMNeeded(type, value))
if (type.NullableType == TdsEnums.SQLJSON)
{
// TODO : Performance and BOM check. Saurabh
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the work that is needed under the TODO?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Wraith2
Problem: A string, small or large, is being converted to a byte array causing allocation.
That byte array is then written to network.

2 ways of fixing this:

  1. Send this out as Plp chunks. Say some chunk, smaller than the packetsize, write the Plp chunk length, write the string bytes, rinse and repeat till all the bytes can be written.
  2. Do not allocate the byte[] to send out the string. Get the length, write it, and then based on the packet size, write out the UTF8 encoded bytes of the string using the Encoding/Text buffer APIs, so that the string is directly translated to the _outBuff, instead of the intermediate allocation.

These are the solutions in my mind. However I have not looked at the encoding / text buffer APIs in detail.

Any insights? This is the problem I had discussed on the call with you as well.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely option 2.
Getting the length to write it and then getting the bytes again is a bit wasteful but it's better than assuming we can buffer the input string and the bytes in memory at the same time. Consider a containerized scenario which has low memory working with large strings of json.

If we're working in netcore then we probably want to try and use https://learn.microsoft.com/en-gb/dotnet/api/system.text.unicode.utf8.fromutf16?view=net-8.0 which will allow us to use a ROS from the string to get incremental chunks of bytes from it and write to the packets directly.

If we're only using ns2.0 level apis so we have netfx compatibility then sadly we're stuck with a UTF8 Encoder instance https://learn.microsoft.com/en-gb/dotnet/api/system.text.encoder.getbytes?view=net-8.0 which can be a little complicated to work with correctly.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely option 2.
Getting the length to write it and then getting the bytes again is a bit wasteful but it's better than assuming we can buffer the input string and the bytes in memory at the same time. Consider a containerized scenario which has low memory working with large strings of json.

I'm probably missing context/TDS knowledge here, but PLP seems ideal for large string data, no? You can have a single byte[] buffer, and repeatedly use Encoder.GetBytes (or Utf8.FromUtf16) to write out a chunk of the string into the buffer, prefixing it with the chunk length and sending that chunk out, no?

PLP is one thing in TDS which (if I understand it correctly) is missing in the PG protocol: everything has to be fully length-prefixed, so when sending a long string, Npgsql has to either do a full pass just to calculate the exact byte length in advance, or continuously allocate more memory as we're encoding, which means arbitrarily-large memory requirements (not good). The ability to send a huge value in multiple chunks - each length-prefixed and without having to prefix the whole thing with a length - seems very useful here.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, PLP is really useful for string data but the packetization requirements of tds also make it a but tricky. Any UTF16 codepoint can encode to 1..3 output bytes and typically if a 3 byte output is encountered and doesn't fit entirely in the output buffer it isn't output and the function would return only complete encodings. This is a problem because TDS packets must be filled to the last byte so you need to be able to get partial utf sequences.

I was talking to @GrabYourPitchforks about this on discord yesterday because I wanted to make sure my recommendations were correct. The best idea that was suggested was to use the Convert function and pass it an output buffer specifically of (space in packet + 3) bytes, this ensures that if any multibyte sequences are present they will be fully present in the output and we can then handle taking the bytes we need and temporarily storing the remaining parts of the encoded output until the next packet.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the context @Wraith2, that helps! But doesn't Encoder.GetBytes() exist exactly for this kind of thing? In other words, if there are only two bytes left in the packet buffer and the next character needs 3, wouldn't it populate 2 bytes and allow you to write the 3rd byte in the next one, after having sent the packet?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. That's why I suggested using an Encoder instance. It was pointed out to me that using Convert may not need an instance though which saves an allocation.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, thanks. Worst case, keeping an Encoder instance per physical connection and reusing it should also be fine (as I'm assuming no need for concurrent writing).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we have a path forward.

@apoorvdeshmukh can you open a followup issue to tackle this performance improvement and add the link in the comment. We can tackle it in the next wave of change for JSON.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raised issue #2732 to track this.

byte[] jsonAsBytes = Encoding.UTF8.GetBytes(value.ToString());
WriteInt(jsonAsBytes.Length, stateObj);
return stateObj.WriteByteArray(jsonAsBytes, jsonAsBytes.Length, 0, canAccumulate: false);
}
else if (IsBOMNeeded(type, value))
{
WriteInt(actualLength + 2, stateObj); // chunk length
WriteShort(TdsEnums.XMLUNICODEBOM, stateObj);
Expand Down Expand Up @@ -12135,6 +12149,7 @@ private Task WriteUnterminatedValue(object value, MetaType type, byte scale, int
case TdsEnums.SQLNVARCHAR:
case TdsEnums.SQLNTEXT:
case TdsEnums.SQLXMLTYPE:
case TdsEnums.SQLJSON:
{
Debug.Assert(!isDataFeed || (value is TextDataFeed || value is XmlDataFeed), "Value must be a TextReader or XmlReader");
Debug.Assert(isDataFeed || (value is string || value is byte[]), "Value is a byte array or string");
Expand All @@ -12156,7 +12171,14 @@ private Task WriteUnterminatedValue(object value, MetaType type, byte scale, int
{
if (type.IsPlp)
{
if (IsBOMNeeded(type, value))
if (type.NullableType == TdsEnums.SQLJSON)
{
// TODO : Performance and BOM check. Saurabh
byte[] jsonAsBytes = Encoding.UTF8.GetBytes((string)value);
WriteInt(jsonAsBytes.Length, stateObj);
return stateObj.WriteByteArray(jsonAsBytes, jsonAsBytes.Length, 0, canAccumulate: false);
}
else if (IsBOMNeeded(type, value))
{
WriteInt(actualLength + 2, stateObj); // chunk length
WriteShort(TdsEnums.XMLUNICODEBOM, stateObj);
Expand Down Expand Up @@ -12662,7 +12684,7 @@ internal void WriteParameterVarLen(MetaType type, int size, bool isNull, TdsPars
WriteInt(unchecked((int)TdsEnums.VARLONGNULL), stateObj);
}
}
else if (type.NullableType == TdsEnums.SQLXMLTYPE || unknownLength)
else if (type.NullableType is TdsEnums.SQLXMLTYPE or TdsEnums.SQLJSON || unknownLength)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
else if (type.NullableType is TdsEnums.SQLXMLTYPE or TdsEnums.SQLJSON || unknownLength)
else if (type.NullableType is TdsEnums.SQLXMLTYPE or TdsEnums.SQLJSON or unknownLength)

{
WriteUnsignedLong(TdsEnums.SQL_PLP_UNKNOWNLEN, stateObj);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@
</COMReference>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="$(MicrosoftExtensionsCachingMemoryVersion)" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="$(MicrosoftExtensionsCachingMemoryVersion)" />
<PackageReference Include="System.Text.Encodings.Web">
<Version>$(SystemTextEncodingsWebVersion)</Version>
</PackageReference>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7180,7 +7180,7 @@ internal string BuildParamList(TdsParser parser, SqlParameterCollection paramete
paramList.Append(size);
paramList.Append(')');
}
else if (mt.IsPlp && (mt.SqlDbType != SqlDbType.Xml) && (mt.SqlDbType != SqlDbType.Udt))
else if (mt.IsPlp && (mt.SqlDbType != SqlDbType.Xml) && (mt.SqlDbType != SqlDbType.Udt) && (mt.SqlDbType != SqlDbTypeExtensions.Json))
{
paramList.Append("(max) ");
}
Expand Down
Loading