SQL Server has a limitation, where it doesn't allow aggregate functions invocations over other aggregate functions, or over subqueries; it errors with: "Cannot perform an aggregate function on an expression containing an aggregate or a subquery".
Examples:
// Aggregate over scalar subquery
_ = await context.Blogs
.Select(b => new
{
b.Category,
FirstPostViews = b.Posts.OrderBy(p => p.Id).FirstOrDefault()!.Views
})
.GroupBy(x => x.Category)
.Select(g => g.Sum(x => x.FirstPostViews))
.ToListAsync();
// Aggregate over aggregate
_ = await context.Blogs
.Select(b => new
{
b.Category,
FirstPostViews = b.Posts.Sum(p => p.Views)
})
.GroupBy(x => x.Category)
.Select(g => g.Sum(x => x.FirstPostViews))
.ToListAsync();
The invalid SQL looks like this:
SELECT SUM((
SELECT TOP(1) [p].[Views]
FROM [Post] AS [p]
WHERE [b].[Id] = [p].[BlogId]
ORDER BY [p].[Id]))
FROM [Blogs] AS [b]
GROUP BY [b].[Category];
SELECT SUM((
SELECT SUM([p].[Views])
FROM [Post] AS [p]
WHERE [b].[Id] = [p].[BlogId]))
FROM [Blogs] AS [b]
GROUP BY [b].[Category]
We should be able to have a post-processing step that lifts the subquery out of the projection, and integrates it into the SELECT expression as a OUTER APPLY (or CROSS JOIN, if uncorrelated):
SELECT SUM(x.Views)
FROM [Blogs] AS [b]
OUTER APPLY (
SELECT TOP(1) [p].[Views]
FROM [Post] AS [p]
WHERE [b].[Id] = [p].[BlogId]
ORDER BY [p].[Id]
) AS [x]
GROUP BY [b].[Category];
SELECT SUM([x].[ViewSum])
FROM [Blogs] AS [b]
OUTER APPLY (
SELECT SUM([p].[Views]) AS [ViewSum]
FROM [Post] AS [p]
WHERE [b].[Id] = [p].[BlogId]) AS [x]
GROUP BY [b].[Category]
Note that this affects non-GroupBy queries as well.
Full code
await using var context = new BlogContext();
await context.Database.EnsureDeletedAsync();
await context.Database.EnsureCreatedAsync();
// Aggregate over scalar subquery
_ = await context.Blogs
.Select(b => new
{
b.Category,
FirstPostViews = b.Posts.OrderBy(p => p.Id).FirstOrDefault()!.Views
})
.GroupBy(x => x.Category)
.Select(g => g.Sum(x => x.FirstPostViews))
.ToListAsync();
// Aggregate over aggregate
_ = await context.Blogs
.Select(b => new
{
b.Category,
FirstPostViews = b.Posts.Sum(p => p.Views)
})
.GroupBy(x => x.Category)
.Select(g => g.Sum(x => x.FirstPostViews))
.ToListAsync();
public class BlogContext : DbContext
{
public DbSet<Blog> Blogs { get; set; }
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
=> optionsBuilder
.UseSqlServer("Server=localhost;Database=test;User=SA;Password=Abcd5678;Connect Timeout=60;ConnectRetryCount=0;Encrypt=false")
.LogTo(Console.WriteLine, LogLevel.Information)
.EnableSensitiveDataLogging();
}
public class Blog
{
public int Id { get; set; }
public string Category { get; set; }
public List<Post> Posts { get; set; }
}
public class Post
{
public int Id { get; set; }
public int Views { get; set; }
public Blog Blog { get; set; }
}
SQL Server has a limitation, where it doesn't allow aggregate functions invocations over other aggregate functions, or over subqueries; it errors with: "Cannot perform an aggregate function on an expression containing an aggregate or a subquery".
Examples:
The invalid SQL looks like this:
We should be able to have a post-processing step that lifts the subquery out of the projection, and integrates it into the SELECT expression as a OUTER APPLY (or CROSS JOIN, if uncorrelated):
Note that this affects non-GroupBy queries as well.
Full code