From b4d052a6b6dfecf076bd3eab5d5c46403dbd0bf1 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 15 Jan 2020 14:21:27 -0800 Subject: [PATCH 1/2] [partial-apply-combiner] Fix handling of indirect parameters. I do not believe that this code was ever correct and we just got lucky since in the vast majority of cases where this optimization kicks in we also eliminate the underlying partial apply/allocations. To make sure that we can properly test this code and make sure that we do not run into this problem again, I added an option to SILCombinerApplyVisitors that disables partial apply deletion so we can verify that we do not rely on eliminating the underlying partial apply for correctness. ---- To be more specific, partial_apply expects that it will take all of its input arguments at +1. This includes parameters that are passed @in or @in_guaranteed to the underlying function. Just to summarize the expected semantics are that: 1. If the underlying function takes the parameter @in then the partial_apply forwarder allows the underlying function to consume the argument. 2. If the underlying function takes the parameter @in_guaranteed then the partial_apply forwarder itself will destroy the underlying value. In terms of the partial_apply combiner this means that unless we can guarantee that the partial_apply is eliminated as a result of our transformation (which we can not givne the way this utility is written), we must introduce new copies of @in, @in_guaranteed partial applied arguments and lifetime extend those copies to the new direct apply that we are creating. In the case of the parameter being in_guaranteed we insert a destroy after that call site to ensure that our new copy is cleaned up. If we have an @in parameter then we allow the underlying function to clean up the value. --- .../SILCombiner/SILCombinerApplyVisitors.cpp | 14 ++- .../Utils/PartialApplyCombiner.cpp | 61 +++++++----- test/SILOptimizer/partial_apply_combiner.sil | 92 +++++++++++++++++++ test/SILOptimizer/sil_combine.sil | 11 +-- test/SILOptimizer/sil_combine_apply.sil | 1 - 5 files changed, 144 insertions(+), 35 deletions(-) create mode 100644 test/SILOptimizer/partial_apply_combiner.sil diff --git a/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp b/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp index 3aac12abe25d9..ec791c106a0af 100644 --- a/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp +++ b/lib/SILOptimizer/SILCombiner/SILCombinerApplyVisitors.cpp @@ -14,9 +14,9 @@ #include "SILCombiner.h" #include "swift/AST/GenericSignature.h" #include "swift/AST/Module.h" +#include "swift/AST/SemanticAttrs.h" #include "swift/AST/SubstitutionMap.h" #include "swift/Basic/Range.h" -#include "swift/AST/SemanticAttrs.h" #include "swift/SIL/DebugUtils.h" #include "swift/SIL/DynamicCasts.h" #include "swift/SIL/InstructionUtils.h" @@ -33,12 +33,17 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" using namespace swift; using namespace swift::PatternMatch; STATISTIC(NumOptimizedKeypaths, "Number of optimized keypath instructions"); +static llvm::cl::opt + DisableDeletingDeadClosures("sil-combine-disable-dead-closure-elim", + llvm::cl::init(false)); + /// Remove pointless reabstraction thunk closures. /// partial_apply %reabstraction_thunk_typeAtoB( /// partial_apply %reabstraction_thunk_typeBtoA %closure_typeB)) @@ -106,8 +111,11 @@ SILInstruction *SILCombiner::visitPartialApplyInst(PartialApplyInst *PAI) { tryOptimizeApplyOfPartialApply(PAI, Builder, getInstModCallbacks()); - // Try to delete dead closures. - tryDeleteDeadClosure(PAI, getInstModCallbacks()); + // Try to delete dead closures unless we are testing and we were asked to not + // do so. + if (!DisableDeletingDeadClosures) { + tryDeleteDeadClosure(PAI, getInstModCallbacks()); + } return nullptr; } diff --git a/lib/SILOptimizer/Utils/PartialApplyCombiner.cpp b/lib/SILOptimizer/Utils/PartialApplyCombiner.cpp index 76c1ae4ef779e..289737c30e877 100644 --- a/lib/SILOptimizer/Utils/PartialApplyCombiner.cpp +++ b/lib/SILOptimizer/Utils/PartialApplyCombiner.cpp @@ -29,7 +29,7 @@ class PartialApplyCombiner { // Temporaries created as copies of alloc_stack arguments of // the partial_apply. - SmallVector tmpCopies; + SmallVector, 8> tmpCopies; // Mapping from the original argument of partial_apply to // the temporary containing its copy. @@ -86,20 +86,25 @@ bool PartialApplyCombiner::allocateTemporaries() { auto argList = pai->getArguments(); paramList = paramList.drop_front(paramList.size() - argList.size()); - llvm::SmallVector, 8> argsToHandle; + struct ArgState { + SILValue value; + unsigned index; + bool needsDestroy; + + ArgState(SILValue value, unsigned index, bool needsDestroy) + : value(value), index(index), needsDestroy(needsDestroy) {} + }; + SmallVector argsToHandle; for (unsigned i : indices(argList)) { SILValue arg = argList[i]; SILParameterInfo param = paramList[i]; + if (param.isIndirectMutating()) continue; - // Create a temporary and copy the argument into it, if: - // - the argument stems from an alloc_stack - // - the argument is consumed by the callee and is indirect - // (e.g. it is an @in argument) - if (isa(arg) || - (param.isConsumed() && - pai->getSubstCalleeConv().isSILIndirect(param))) { + // Always create a temporary and copy the argument into it if we have an + // indirect argument. + if (pai->getSubstCalleeConv().isSILIndirect(param)) { // If the argument has a dependent type, then we can not create a // temporary for it at the beginning of the function, so we must bail. // @@ -108,10 +113,13 @@ bool PartialApplyCombiner::allocateTemporaries() { if (arg->getType().hasOpenedExistential()) return false; - // If the temporary is non-trivial, we need to destroy it later. - if (!arg->getType().isTrivial(*pai->getFunction())) + bool argNeedsDestroy = false; + if (!param.isConsumed() && + !arg->getType().isTrivial(*pai->getFunction())) { + argNeedsDestroy = true; needsDestroys = true; - argsToHandle.push_back(std::make_pair(arg, i)); + } + argsToHandle.emplace_back(arg, i, argNeedsDestroy /*needs destroy*/); } } @@ -125,20 +133,21 @@ bool PartialApplyCombiner::allocateTemporaries() { return false; } - for (auto argWithIdx : argsToHandle) { - SILValue Arg = argWithIdx.first; + for (auto argState : argsToHandle) { + SILValue Arg = argState.value; builder.setInsertionPoint(pai->getFunction()->begin()->begin()); // Create a new temporary at the beginning of a function. - SILDebugVariable dbgVar(/*Constant*/ true, argWithIdx.second); + SILDebugVariable dbgVar(/*Constant*/ true, argState.index); auto *tmp = builder.createAllocStack(pai->getLoc(), Arg->getType(), dbgVar); builder.setInsertionPoint(pai); // Copy argument into this temporary. builder.createCopyAddr(pai->getLoc(), Arg, tmp, IsTake_t::IsNotTake, IsInitialization_t::IsInitialization); - tmpCopies.push_back(tmp); + tmpCopies.emplace_back(tmp, argState.needsDestroy); argToTmpCopy.insert(std::make_pair(Arg, tmp)); } + return true; } @@ -152,22 +161,26 @@ void PartialApplyCombiner::deallocateTemporaries() { for (auto copy : tmpCopies) { builder.setInsertionPoint(term); - builder.createDeallocStack(pai->getLoc(), copy); + builder.createDeallocStack(pai->getLoc(), copy.first); } } } /// Emit code to release/destroy temporaries. void PartialApplyCombiner::destroyTemporaries() { - // Insert releases and destroy_addrs as early as possible, - // because we don't want to keep objects alive longer than - // its really needed. - for (auto op : tmpCopies) { + // Insert releases and destroy_addrs as early as possible, because we don't + // want to keep objects alive longer than its really needed. + for (auto pair : tmpCopies) { + bool needsDestroy = pair.second; + if (!needsDestroy) + continue; + auto op = pair.first; auto tmpType = op->getType().getObjectType(); if (tmpType.isTrivial(*pai->getFunction())) continue; for (auto *endPoint : partialApplyFrontier) { - builder.setInsertionPoint(endPoint); + SILBuilderWithScope builder(endPoint); + if (!tmpType.isAddressOnly(*pai->getFunction())) { SILValue load = builder.emitLoadValueOperation( pai->getLoc(), op, LoadOwnershipQualifier::Take); @@ -179,8 +192,7 @@ void PartialApplyCombiner::destroyTemporaries() { } } -/// Process an apply instruction which uses a partial_apply -/// as its callee. +/// Process an apply instruction which uses a partial_apply as its callee. /// Returns true on success. bool PartialApplyCombiner::processSingleApply(FullApplySite paiAI) { builder.setInsertionPoint(paiAI.getInstruction()); @@ -232,6 +244,7 @@ bool PartialApplyCombiner::processSingleApply(FullApplySite paiAI) { arg = builder.emitCopyValueOperation(pai->getLoc(), arg); // For non consumed parameters (e.g. guaranteed), we also need to // insert destroys after each apply instruction that we create. + if (!paramInfo[paramInfo.size() - partialApplyArgs.size() + i] .isConsumed()) toBeDestroyedArgs.push_back(arg); diff --git a/test/SILOptimizer/partial_apply_combiner.sil b/test/SILOptimizer/partial_apply_combiner.sil new file mode 100644 index 0000000000000..fcf2fd6a4945f --- /dev/null +++ b/test/SILOptimizer/partial_apply_combiner.sil @@ -0,0 +1,92 @@ +// RUN: %target-sil-opt -enable-objc-interop -sil-combine-disable-dead-closure-elim=true -enable-sil-verify-all %s -sil-combine | %FileCheck %s + +sil_stage canonical + +import Builtin + +sil @in_argument_func : $@convention(thin) (@in Builtin.NativeObject) -> () +sil @inguaranteed_argument_func : $@convention(thin) (@in_guaranteed Builtin.NativeObject) -> () + +// CHECK-LABEL: sil @test_in_argument_func : $@convention(thin) (@in Builtin.NativeObject) -> () { +// CHECK: bb0([[ARG:%.*]] : $*Builtin.NativeObject): +// CHECK: [[ALLOC:%.*]] = alloc_stack $Builtin.NativeObject +// CHECK: copy_addr [[ARG]] to [initialization] [[ALLOC]] +// CHECK-NEXT: [[PAI:%.*]] = partial_apply [callee_guaranteed] {{%.*}}([[ARG]]) +// CHECK-NEXT: apply {{%.*}}([[ALLOC]]) +// CHECK-NEXT: strong_release [[PAI]] +// CHECK-NOT: strong_release +// CHECK: } // end sil function 'test_in_argument_func' +sil @test_in_argument_func : $@convention(thin) (@in Builtin.NativeObject) -> () { +bb0(%0 : $*Builtin.NativeObject): + %f = function_ref @in_argument_func : $@convention(thin) (@in Builtin.NativeObject) -> () + %1 = partial_apply [callee_guaranteed] %f(%0) : $@convention(thin) (@in Builtin.NativeObject) -> () + apply %1() : $@callee_guaranteed () -> () + strong_release %1 : $@callee_guaranteed () -> () + %9999 = tuple() + return %9999 : $() +} + +// CHECK-LABEL: sil @test_in_guaranteed_argument_func : $@convention(thin) (@in Builtin.NativeObject) -> () { +// CHECK: bb0([[ARG:%.*]] : +// CHECK: [[ASI:%.*]] = alloc_stack +// CHECK: copy_addr [[ARG]] to [initialization] [[ASI]] +// CHECK: [[PAI:%.*]] = partial_apply [callee_guaranteed] {{%.*}}([[ARG]]) +// CHECK: apply {{%.*}}([[ASI]]) +// CHECK: strong_release [[PAI]] +// CHECK: [[RELOAD_VALUE:%.*]] = load [[ASI]] +// CHECK: strong_release [[RELOAD_VALUE]] +// CHECK: } // end sil function 'test_in_guaranteed_argument_func' +sil @test_in_guaranteed_argument_func : $@convention(thin) (@in Builtin.NativeObject) -> () { +bb0(%0 : $*Builtin.NativeObject): + %f = function_ref @inguaranteed_argument_func : $@convention(thin) (@in_guaranteed Builtin.NativeObject) -> () + %1 = partial_apply [callee_guaranteed] %f(%0) : $@convention(thin) (@in_guaranteed Builtin.NativeObject) -> () + apply %1() : $@callee_guaranteed () -> () + strong_release %1 : $@callee_guaranteed () -> () + %9999 = tuple() + return %9999 : $() +} + +// CHECK-LABEL: sil @test_in_argument_func_allocstack : $@convention(thin) (@in Builtin.NativeObject) -> () { +// CHECK: bb0([[ARG:%.*]] : +// CHECK: [[STACK_1:%.*]] = alloc_stack $Builtin.NativeObject +// CHECK: [[STACK_2:%.*]] = alloc_stack $Builtin.NativeObject +// CHECK: copy_addr [take] [[ARG]] to [initialization] [[STACK_2]] +// CHECK: copy_addr [[STACK_2]] to [initialization] [[STACK_1]] +// CHECK-NEXT: [[PAI:%.*]] = partial_apply [callee_guaranteed] {{%.*}}([[STACK_2]]) +// CHECK-NEXT: apply {{%.*}}([[STACK_1]]) +// CHECK-NEXT: strong_release [[PAI]] +// CHECK-NEXT: dealloc_stack [[STACK_2]] +// CHECK-NEXT: tuple +// CHECK-NEXT: dealloc_stack [[STACK_1]] +// CHECK: } // end sil function 'test_in_argument_func_allocstack' +sil @test_in_argument_func_allocstack : $@convention(thin) (@in Builtin.NativeObject) -> () { +bb0(%0 : $*Builtin.NativeObject): + %alloc = alloc_stack $Builtin.NativeObject + copy_addr [take] %0 to [initialization] %alloc : $*Builtin.NativeObject + + %f = function_ref @in_argument_func : $@convention(thin) (@in Builtin.NativeObject) -> () + %1 = partial_apply [callee_guaranteed] %f(%alloc) : $@convention(thin) (@in Builtin.NativeObject) -> () + apply %1() : $@callee_guaranteed () -> () + strong_release %1 : $@callee_guaranteed () -> () + dealloc_stack %alloc : $*Builtin.NativeObject + + %9999 = tuple() + return %9999 : $() +} + +// CHECK-LABEL: sil @test_in_guaranteed_argument_func_allocstack : $@convention(thin) (@in Builtin.NativeObject) -> () { +sil @test_in_guaranteed_argument_func_allocstack : $@convention(thin) (@in Builtin.NativeObject) -> () { +bb0(%0 : $*Builtin.NativeObject): + %alloc = alloc_stack $Builtin.NativeObject + copy_addr [take] %0 to [initialization] %alloc : $*Builtin.NativeObject + + %f = function_ref @inguaranteed_argument_func : $@convention(thin) (@in_guaranteed Builtin.NativeObject) -> () + %1 = partial_apply [callee_guaranteed] %f(%alloc) : $@convention(thin) (@in_guaranteed Builtin.NativeObject) -> () + apply %1() : $@callee_guaranteed () -> () + strong_release %1 : $@callee_guaranteed () -> () + + dealloc_stack %alloc : $*Builtin.NativeObject + + %9999 = tuple() + return %9999 : $() +} diff --git a/test/SILOptimizer/sil_combine.sil b/test/SILOptimizer/sil_combine.sil index 34b01f4113b5f..ac34a6a0a2c1c 100644 --- a/test/SILOptimizer/sil_combine.sil +++ b/test/SILOptimizer/sil_combine.sil @@ -2955,9 +2955,8 @@ sil @closure_with_in_guaranteed_owned_in_args : $@convention(method) (@in CC2, @ // CHECK: apply [[CLOSURE]]({{.*}}, [[TMP]]) // Check that the peephole inserted a release the guaranteed argument // CHECK: strong_release %{{[0-9]+}} : $CC1 -// Release the @owned CC4 argument of the function -// CHECK: load {{%[0-9]+}} : $*CC4 -// CHECK: strong_release {{%[0-9]+}} : $CC4 +// Do not release the @owned CC4 argument of the function +// CHECK-NOT: load {{%[0-9]+}} : $*CC4 // CHECK: br bb3 // CHECK: bb2: @@ -2969,9 +2968,8 @@ sil @closure_with_in_guaranteed_owned_in_args : $@convention(method) (@in CC2, @ // CHECK: apply [[CLOSURE]]({{.*}}, [[TMP]]) // Check that the peephole inserted a release the closure's guaranteed argument // CHECK: strong_release %{{[0-9]+}} : $CC1 -// Release the @owned CC4 argument of the function -// CHECK: load {{%[0-9]+}} : $*CC4 -// CHECK: strong_release {{%[0-9]+}} : $CC4 +// Make sure we do not reload/release the @owned CC4 argument of the function +// CHECK-NOT: load {{%[0-9]+}} : $*CC4 // CHECK: br bb3 // bb3: @@ -3089,7 +3087,6 @@ bb2: // CHECK-NEXT: copy_addr [[ARG1]] to [initialization] [[TMP]] : $*T // CHECK-NEXT: destroy_addr [[ARG1]] // CHECK-NEXT: apply [[FN]]([[ARG0]], [[TMP]]) -// CHECK-NEXT: destroy_addr [[TMP]] // CHECK-NEXT: tuple // CHECK-NEXT: dealloc_stack [[TMP]] // CHECK-NEXT: return diff --git a/test/SILOptimizer/sil_combine_apply.sil b/test/SILOptimizer/sil_combine_apply.sil index c387831d63ec8..60cd4ce9862e8 100644 --- a/test/SILOptimizer/sil_combine_apply.sil +++ b/test/SILOptimizer/sil_combine_apply.sil @@ -276,7 +276,6 @@ bb0: // CHECK-NEXT: copy_addr [take] [[ARG1]] to [initialization] [[PA_TMP]] // CHECK-NEXT: apply [[FN]]([[ARG0]], [[APPLY_TMP]]) // CHECK-NEXT: destroy_addr [[PA_TMP]] -// CHECK-NEXT: destroy_addr [[APPLY_TMP]] // CHECK-NEXT: tuple // CHECK-NEXT: dealloc_stack [[APPLY_TMP]] // CHECK-NEXT: dealloc_stack [[PA_TMP]] From 1b807b886ce5cba8ed0e399f6a2e27efd5b4049e Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 22 Jan 2020 14:10:57 -0800 Subject: [PATCH 2/2] [sil.rst] Clarify documentation around partial_apply closure contexts/closed over parameters. --- docs/SIL.rst | 90 +++++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/docs/SIL.rst b/docs/SIL.rst index 4358e2ec12f3a..256b14beb74f0 100644 --- a/docs/SIL.rst +++ b/docs/SIL.rst @@ -3495,46 +3495,8 @@ partial_apply // %r will be of the substituted thick function type $(Z'...) -> R' Creates a closure by partially applying the function ``%0`` to a partial -sequence of its arguments. In the instruction syntax, the type of the callee is -specified after the argument list; the types of the argument and of the defined -value are derived from the function type of the callee. If the ``partial_apply`` -has an escaping function type (not ``[on_stack]``) the closure context will be -allocated with retain count 1 and initialized to contain the values ``%1``, -``%2``, etc. The closed-over values will not be retained; that must be done -separately before the ``partial_apply``. The closure does however take ownership -of the partially applied arguments (except for ``@inout_aliasable`` parameters); -when the closure reference count reaches zero, the contained values will be -destroyed. If the ``partial_apply`` has a ``@noescape`` function type -(``partial_apply [on_stack]``) the closure context is allocated on the stack and -initialized to contain the closed-over values. The closed-over values are not -retained, lifetime of the closed-over values must be managed separately. The -lifetime of the stack context of a ``partial_apply [on_stack]`` must be -terminated with a ``dealloc_stack``. - -If the callee is generic, all of its generic parameters must be bound by the -given substitution list. The arguments are given with these generic -substitutions applied, and the resulting closure is of concrete function -type with the given substitutions applied. The generic parameters themselves -cannot be partially applied; all of them must be bound. The result is always -a concrete function. - -If an address argument has ``@inout_aliasable`` convention, the closure -obtained from ``partial_apply`` will not own its underlying value. -The ``@inout_aliasable`` parameter convention is used when a ``@noescape`` -closure captures an ``inout`` argument. - -TODO: The instruction, when applied to a generic function, -currently implicitly performs abstraction difference transformations enabled -by the given substitutions, such as promoting address-only arguments and returns -to register arguments. This should be fixed. - -By default, ``partial_apply`` creates a closure whose invocation takes ownership -of the context, meaning that a call implicitly releases the closure. The -``[callee_guaranteed]`` change this to a caller-guaranteed model, where the -caller promises not to release the closure while the function is being called. - -This instruction is used to implement both curry thunks and closures. A -curried function in Swift:: +sequence of its arguments. This instruction is used to implement both curry +thunks and closures. A curried function in Swift:: func foo(_ a:A)(b:B)(c:C)(d:D) -> E { /* body of foo */ } @@ -3607,6 +3569,54 @@ lowers to an uncurried entry point and is curried in the enclosing function:: return %ret : $Int } +**Ownership Semantics of Closure Context during Invocation**: By default, an +escaping ``partial_apply`` (``partial_apply`` without ``[on_stack]]`` creates a +closure whose invocation takes ownership of the context, meaning that a call +implicitly releases the closure. If the ``partial_apply`` is marked with the +flag ``[callee_guaranteed]`` the invocation instead uses a caller-guaranteed +model, where the caller promises not to release the closure while the function +is being called. + +**Captured Value Ownership Semantics**: In the instruction syntax, the type of +the callee is specified after the argument list; the types of the argument and +of the defined value are derived from the function type of the callee. Even so, +the ownership requirements of the partial apply are not the same as that of the +callee function (and thus said signature). Instead: + +1. If the ``partial_apply`` has a ``@noescape`` function type (``partial_apply + [on_stack]``) the closure context is allocated on the stack and is + initialized to contain the closed-over values without taking ownership of + those values. The closed-over values are not retained and the lifetime of the + closed-over values must be managed by other instruction independently of the + ``partial_apply``. The lifetime of the stack context of a ``partial_apply + [on_stack]`` must be terminated with a ``dealloc_stack``. + +2. If the ``partial_apply`` has an escaping function type (not ``[on_stack]``) + then the closure context will be heap allocated with a retain count of 1. Any + closed over parameters (except for ``@inout`` parameters) will be consumed by + the partial_apply. This ensures that no matter when the ``partial_apply`` is + called, the captured arguments are alive. When the closure context's + reference count reaches zero, the contained values are destroyed. If the + callee requires an owned parameter, then the implicit partial_apply forwarder + created by IRGen will copy the underlying argument and pass it to the callee. + +3. If an address argument has ``@inout_aliasable`` convention, the closure + obtained from ``partial_apply`` will not own its underlying value. The + ``@inout_aliasable`` parameter convention is used when a ``@noescape`` + closure captures an ``inout`` argument. + +**NOTE:** If the callee is generic, all of its generic parameters must be bound +by the given substitution list. The arguments are given with these generic +substitutions applied, and the resulting closure is of concrete function type +with the given substitutions applied. The generic parameters themselves cannot +be partially applied; all of them must be bound. The result is always a concrete +function. + +**TODO:** The instruction, when applied to a generic function, currently +implicitly performs abstraction difference transformations enabled by the given +substitutions, such as promoting address-only arguments and returns to register +arguments. This should be fixed. + builtin ``````` ::