Suspension/resumption for runtime async is still not as optimized as it could be, and it shows up as a regression when comparing runtime async to async1. When not much work happens in user code the overhead of suspension/resumption machinery dominates and this machinery is more costly for runtime async than for async1.
For example, this benchmark does practically nothing except suspend/resume:
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
namespace OSRPerf;
public class Program
{
static void Main()
{
NullAwaiter na = new NullAwaiter();
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < 500; j++)
{
Task t = Foo(20, na);
while (!t.IsCompleted)
{
na.Continue();
}
}
Thread.Sleep(100);
}
for (int i = 0; i < 50; i++)
{
Task t = Foo(10_000_000, na);
while (!t.IsCompleted)
{
na.Continue();
}
}
}
static int s_value;
static async Task Foo(int n, NullAwaiter na)
{
for (int i = 0; i < n; i++)
{
s_value += i;
}
Stopwatch timer = Stopwatch.StartNew();
for (int i = 0; i < n; i++)
{
await na;
}
if (n > 1000)
Console.WriteLine("Took {0:F1} ms", timer.Elapsed.TotalMilliseconds);
}
private class NullAwaiter : ICriticalNotifyCompletion
{
public Action Continue;
public NullAwaiter GetAwaiter() => this;
public bool IsCompleted => false;
public void GetResult()
{
}
public void UnsafeOnCompleted(Action continuation)
{
Continue = continuation;
}
public void OnCompleted(Action continuation)
{
throw new NotImplementedException();
}
}
}
Runtime async: Took 351.1 ms
Async1: Took 210.0 ms
Suspension/resumption for runtime async is still not as optimized as it could be, and it shows up as a regression when comparing runtime async to async1. When not much work happens in user code the overhead of suspension/resumption machinery dominates and this machinery is more costly for runtime async than for async1.
For example, this benchmark does practically nothing except suspend/resume:
Runtime async: Took 351.1 ms
Async1: Took 210.0 ms