@smitpatel ran a few custom batching benchmarks recently that showed interesting results. His benchmarks focuses on INSERT batching for the same entity, but similar analysis could be performed for other operations.
The test inserts 1000 rows on a table in SQL Server. There are two separate series, one for a local instance and one for a remote instance. The chart shows the time elapsed (on Y) for different batch sizes (on X):

To make sense if this chart, keep in mind that at maximum batch size of 1, there is now batching, and we resort to different SQL that has a smaller fixed cost.
There are a few interesting things to observe:
- For a local instance (i.e. low latency) the benefits of batching are very limited and in fact there only seem to be some improvement between 20 and 30 for batch size.
- For both local an remote things start to get worse after batch size of 40
- A batch size of 2 seems to be a very bad idea, even for a remote (i.e. higher latency) instance.
Possible conclusions and follow up actions:
- Currently if batch size is not specified we default to large batches limited only by the maximum number of parameters for a SQL statement which is 2100. It seems that we should pick a smaller default, e.g. 20.
- There seems to be potential value in a minimum batch size setting, e.g. do not do batching unless there can be benefit. The threshold for the remote run seems to be 4, and for the local run closer to 20.
- We should try to understand how this behaves for other operations.
- We should try to understand how this is sensitive to number and types of columns, and in general to the size of the data.
- If possible we should experiment a bit with concatenated INSERTs as a strategy for batching INSERTs and compare the results.
- If possible we should experiment with higher latency database connections. Our current “remote” is not very remote.
- Ultimately it would be cool if batch size could be adaptive 😄
The code of the test follows:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace ConsoleApp6
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Hello World!");
new Test().Run();
}
}
public class Test
{
protected OrdersContext Context;
protected string ConnectionString = "Server=(localdb)\\mssqllocaldb;Database=Benchmarks;Trusted_Connection=True;Database=Perf_UpdatePipeline_Simple";
public void Run()
{
var stopwatch = new Stopwatch();
for (var i = 0; i < 150; i++)
{
for (var j = 0; j < 2; j++)
{
Context = CreateContext(i);
var beginTransaction = Context.Database.BeginTransaction();
var customers = CreateCustomers(1000, setPrimaryKeys: false);
Context.Customers.AddRange(customers);
stopwatch.Reset();
stopwatch.Start();
Context.SaveChanges();
stopwatch.Stop();
if (j != 0)
Console.WriteLine($"BatchSize={i}=>{stopwatch.Elapsed}");
beginTransaction.Dispose();
Context.Dispose();
}
}
}
public virtual OrdersContext CreateContext(int batchSize) => new OrdersContext(ConnectionString, batchSize);
public class OrdersContext : DbContext
{
private readonly string _connectionString;
private readonly bool _disableBatching;
private readonly int _batchSize;
public OrdersContext(string connectionString, bool disableBatching = false)
{
_connectionString = connectionString;
_disableBatching = disableBatching;
}
public OrdersContext(string connectionString, int batchSize)
{
_connectionString = connectionString;
_batchSize = batchSize;
}
public DbSet<Customer> Customers { get; set; }
public DbSet<Order> Orders { get; set; }
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
=> optionsBuilder
.UseSqlServer(
_connectionString,
b =>
{
if (_batchSize != 0)
{
b.MaxBatchSize(_batchSize);
}
});
}
public virtual List<Customer> CreateCustomers(int customerCount, bool setPrimaryKeys)
{
var customers = new List<Customer>();
for (var c = 0; c < customerCount; c++)
{
customers.Add(
new Customer
{
CustomerId = setPrimaryKeys ? c + 1 : 0,
Title = c % 2 == 0 ? "Mr" : "Mrs",
FirstName = "Customer " + c,
LastName = "Customer " + c,
DateOfBirth = new DateTime(1980, c % 12 + 1, 1),
IsLoyaltyMember = c % 3 == 0,
Joined = new DateTime(2000, c % 12 + 1, 1),
OptedOutOfMarketing = c % 7 == 0,
Phone = "555-555-5555",
Email = $"customer{c}@sample.com",
AddressLineOne = $"{c} Sample St",
City = "Sampleville",
StateOrProvince = "SMP",
ZipOrPostalCode = "00000",
Country = "United States"
});
}
return customers;
}
public virtual List<Order> CreateOrders(List<Customer> customers, int ordersPerCustomer, bool setPrimaryKeys)
{
var orders = new List<Order>();
for (var c = 0; c < customers.Count; c++)
{
for (var i = 0; i < ordersPerCustomer; i++)
{
orders.Add(
new Order
{
OrderId = setPrimaryKeys ? c * ordersPerCustomer + i + 1 : 0,
CustomerId = customers[c].CustomerId,
Date = new DateTime(2000, 1, 1),
OrderDiscount = i % 3,
DiscountReason = i % 3 == 0 ? null : "They seemed nice",
Tax = i % 10,
Addressee = "Person " + i,
AddressLineOne = $"{i} Sample St",
City = "Sampleville",
StateOrProvince = "SMP",
ZipOrPostalCode = "00000",
Country = "United States"
});
}
}
return orders;
}
public class Order
{
public int OrderId { get; set; }
public DateTime Date { get; set; }
public string SpecialRequests { get; set; }
public decimal OrderDiscount { get; set; }
public string DiscountReason { get; set; }
public decimal Tax { get; set; }
public string Addressee { get; set; }
public string AddressLineOne { get; set; }
public string AddressLineTwo { get; set; }
public string City { get; set; }
public string StateOrProvince { get; set; }
public string ZipOrPostalCode { get; set; }
public string Country { get; set; }
public int CustomerId { get; set; }
public Customer Customer { get; set; }
}
public class Customer
{
public int CustomerId { get; set; }
public string Title { get; set; }
public string FirstName { get; set; }
public string LastName { get; set; }
public DateTime? DateOfBirth { get; set; }
public bool IsLoyaltyMember { get; set; }
public DateTime Joined { get; set; }
public bool OptedOutOfMarketing { get; set; }
public string Phone { get; set; }
public string Email { get; set; }
public string AddressLineOne { get; set; }
public string AddressLineTwo { get; set; }
public string City { get; set; }
public string StateOrProvince { get; set; }
public string ZipOrPostalCode { get; set; }
public string Country { get; set; }
public ICollection<Order> Orders { get; set; }
}
}
}
@smitpatel ran a few custom batching benchmarks recently that showed interesting results. His benchmarks focuses on INSERT batching for the same entity, but similar analysis could be performed for other operations.
The test inserts 1000 rows on a table in SQL Server. There are two separate series, one for a local instance and one for a remote instance. The chart shows the time elapsed (on Y) for different batch sizes (on X):
To make sense if this chart, keep in mind that at maximum batch size of 1, there is now batching, and we resort to different SQL that has a smaller fixed cost.
There are a few interesting things to observe:
Possible conclusions and follow up actions:
The code of the test follows: