-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Queue<T>.Enqueue and Stack<T>.Push performance improvements #36502
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,7 +33,6 @@ public class Queue<T> : IEnumerable<T>, | |
| private int _version; | ||
|
|
||
| private const int MinimumGrow = 4; | ||
| private const int GrowFactor = 200; // double each time | ||
|
|
||
| // Creates a queue with room for capacity objects. The default initial | ||
| // capacity and grow factor are used. | ||
|
|
@@ -183,15 +182,31 @@ void ICollection.CopyTo(Array array, int index) | |
| // Adds item to the tail of the queue. | ||
| public void Enqueue(T item) | ||
| { | ||
| if (_size == _array.Length) | ||
| int tail = _tail; | ||
| int size = _size; | ||
| T[] array = _array; | ||
|
|
||
| if ((uint)size < (uint)array.Length && (uint)tail < (uint)array.Length) | ||
| { | ||
| int newcapacity = (int)((long)_array.Length * (long)GrowFactor / 100); | ||
| if (newcapacity < _array.Length + MinimumGrow) | ||
| { | ||
| newcapacity = _array.Length + MinimumGrow; | ||
| } | ||
| SetCapacity(newcapacity); | ||
| array[tail] = item; | ||
| MoveNext(ref tail, array); | ||
| _tail = tail; | ||
| _size = size + 1; | ||
| _version++; | ||
| } | ||
| else | ||
| { | ||
| EnqueueWithResize(item); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the benefit to separating this out into a non-inlineable method? Is Enqueue inlined with this separated out? Does separating it out make the JIT better at optimizing the rest of the code?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When
No. At least not now, as When written as public void Enqueue(T item)
{
int tail = _tail;
int size = _size;
T[] array = _array;
if ((uint)size < (uint)array.Length && (uint)tail < (uint)array.Length)
{
array[tail] = item;
_tail = MoveNext(ref tail, array);
_size = size + 1;
_version++;
}
else
{
int length = array.Length;
int newCapaicty = Math.Max(length * 2, length + MinimumGrow);
SetCapacity(newCapaicty);
_array[_tail] = item;
MoveNext(ref _tail);
_size++;
_version++;
}
}(so just manually inlined G_M9246_IG01:
55 push rbp
4156 push r14
53 push rbx
488D6C2410 lea rbp, [rsp+10H]
488BDF mov rbx, rdi
448BF6 mov r14d, esi
G_M9246_IG02:
8B7B14 mov edi, dword ptr [rbx+20]
8B7318 mov esi, dword ptr [rbx+24]
488B4308 mov rax, gword ptr [rbx+8]
8B5008 mov edx, dword ptr [rax+8]
3BD6 cmp edx, esi
7624 jbe SHORT G_M9246_IG05
3BD7 cmp edx, edi
7620 jbe SHORT G_M9246_IG05
4863CF movsxd rcx, edi
4489748810 mov dword ptr [rax+4*rcx+16], r14d
FFC7 inc edi
3BD7 cmp edx, edi
7502 jne SHORT G_M9246_IG03
33FF xor edi, edi
G_M9246_IG03:
897B14 mov dword ptr [rbx+20], edi
FFC6 inc esi
897318 mov dword ptr [rbx+24], esi
FF431C inc dword ptr [rbx+28]
G_M9246_IG04:
5B pop rbx
415E pop r14
5D pop rbp
C3 ret
G_M9246_IG05:
8BF2 mov esi, edx
D1E6 shl esi, 1
83C204 add edx, 4
3BF2 cmp esi, edx
7D02 jge SHORT G_M9246_IG06
EB02 jmp SHORT G_M9246_IG07
G_M9246_IG06:
8BD6 mov edx, esi
G_M9246_IG07:
488BFB mov rdi, rbx
8BF2 mov esi, edx
E8D3F1FFFF call Queue`1:SetCapacity(int):this
488B4308 mov rax, gword ptr [rbx+8]
8B7B14 mov edi, dword ptr [rbx+20]
8B7008 mov esi, dword ptr [rax+8]
3BFE cmp edi, esi
7323 jae SHORT G_M9246_IG10
4863FF movsxd rdi, edi
448974B810 mov dword ptr [rax+4*rdi+16], r14d
488D4314 lea rax, bword ptr [rbx+20]
8B38 mov edi, dword ptr [rax]
FFC7 inc edi
3BF7 cmp esi, edi
7502 jne SHORT G_M9246_IG08
33FF xor edi, edi
G_M9246_IG08:
8938 mov dword ptr [rax], edi
FF4318 inc dword ptr [rbx+24]
FF431C inc dword ptr [rbx+28]
G_M9246_IG09:
5B pop rbx
415E pop r14
5D pop rbp
C3 ret
G_M9246_IG10:
E80DD9B678 call CORINFO_HELP_RNGCHKFAIL
CC int3 So there is a "fast-return" at Note: if G_M9246_IG02:
FF431C inc dword ptr [rbx+28] ; _version++
8B7B14 mov edi, dword ptr [rbx+20]
8B7318 mov esi, dword ptr [rbx+24]
488B4308 mov rax, gword ptr [rbx+8]
8B5008 mov edx, dword ptr [rax+8]
3BD6 cmp edx, esi
7621 jbe SHORT G_M9246_IG05
; ...
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I cheated a bit. In the prolog and epilog there's a difference: G_M9243_IG01:
55 push rbp
- 488BEC mov rbp, rsp
- 90 nop
+ 4156 push r14
+ 53 push rbx
+ 488D6C2410 lea rbp, [rsp+10H]
+ 488BDF mov rbx, rdi
+ 448BF6 mov r14d, esi
; ...
G_M9246_IG04:
+ 5B pop rbx
+ 415E pop r14
5D pop rbp
C3 ret
; ...Don't know if this makes much difference -- one may need to measure it, but I assume it's within noise.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also followed from Perhaps I was misunderstanding the purpose behind that in the first place. Even without
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Wraiyth you haven't missunderstood something. It a usual review-process to question and reason about every change -- even when the change is taken over from an accepted PR, so it's good to re-think about some decisions (made earlier, or made now). That's the case here. |
||
| } | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.NoInlining)] | ||
| private void EnqueueWithResize(T item) | ||
| { | ||
| int length = _array.Length; | ||
| int newCapacity = Math.Max(length * 2, length + MinimumGrow); | ||
|
|
||
| SetCapacity(newCapacity); | ||
|
|
||
| _array[_tail] = item; | ||
| MoveNext(ref _tail); | ||
|
|
@@ -357,15 +372,16 @@ private void SetCapacity(int capacity) | |
| _tail = (_size == capacity) ? 0 : _size; | ||
| _version++; | ||
| } | ||
| private void MoveNext(ref int index) => MoveNext(ref index, _array); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: needs a blank line above this. That said, are there many other call sites to this? Why not just use _array at those call sites rather than adding this wrapper?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My idea was #36502 (comment) I see there are other places that could use this too.
By these |
||
|
|
||
| // Increments the index wrapping it if necessary. | ||
| private void MoveNext(ref int index) | ||
| private void MoveNext(ref int index, T[] array) | ||
| { | ||
| // It is tempting to use the remainder operator here but it is actually much slower | ||
| // than a simple comparison and a rarely taken branch. | ||
| // JIT produces better code than with ternary operator ?: | ||
| int tmp = index + 1; | ||
| if (tmp == _array.Length) | ||
| if (tmp == array.Length) | ||
| { | ||
| tmp = 0; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
doesn't
(uint)tail < (uint)array.Lengthjust move the check to C# code?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct. The only mini-advantage is that the JITed code size is a few bytes smaller.
G_M9243_IG02: FF471C inc dword ptr [rdi+28] 8B4714 mov eax, dword ptr [rdi+20] 8B5718 mov edx, dword ptr [rdi+24] 488B4F08 mov rcx, gword ptr [rdi+8] 448B4108 mov r8d, dword ptr [rcx+8] 443BC2 cmp r8d, edx 7620 jbe SHORT G_M9243_IG05 - 443BC0 cmp r8d, eax - 761B jbe SHORT G_M9243_IG05 + 413BC0 cmp eax, r8d + 7323 jae SHORT G_M9243_IG07 4C63C8 movsxd r9, eax 4289748910 mov dword ptr [rcx+4*r9+16], esi FFC0 inc eax 443BC0 cmp r8d, eax 7502 jne SHORT G_M9243_IG03 33C0 xor eax, eax G_M9243_IG03: 894714 mov dword ptr [rdi+20], eax FFC2 inc edx 895718 mov dword ptr [rdi+24], edx G_M9243_IG04: 5D pop rbp C3 ret G_M9243_IG05: E8E0F4FFFF call Queue`1:EnqueueWithResize(int):this 90 nop G_M9243_IG06: 5D pop rbp C3 ret +G_M9243_IG07: + E8882CB778 call CORINFO_HELP_RNGCHKFAIL + CC int3 -; Total bytes of code 67, prolog size 5 for method Queue`1:Enqueue(int):this +; Total bytes of code 73, prolog size 5 for method Queue`1:Enqueue(int):this ; ============================================================I think this can be reverted, thus improving readability of the code.