-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Start Channel Arbitrators concurrently #9262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
ziggie1984
wants to merge
4
commits into
lightningnetwork:master
from
ziggie1984:fix-channel-arbitrator-deadlock
Closed
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| package contractcourt | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "sync" | ||
|
|
@@ -22,11 +23,21 @@ import ( | |
| "github.com/lightningnetwork/lnd/lnwallet" | ||
| "github.com/lightningnetwork/lnd/lnwallet/chainfee" | ||
| "github.com/lightningnetwork/lnd/lnwire" | ||
| "golang.org/x/sync/errgroup" | ||
| ) | ||
|
|
||
| // ErrChainArbExiting signals that the chain arbitrator is shutting down. | ||
| var ErrChainArbExiting = errors.New("ChainArbitrator exiting") | ||
|
|
||
| const ( | ||
| // DefaultChainArbTimeout is the default timeout for the chain | ||
| // arbitrator to start the channel arbitrators for each channel. This | ||
| // value must be long enough because starting arbitrators might depend | ||
| // on external components (e.g. aux components, hooks), which might | ||
| // require other lnd subsystems to be fully started. | ||
| DefaultChainArbTimeout = 5 * time.Minute | ||
| ) | ||
|
|
||
| // ResolutionMsg is a message sent by resolvers to outside sub-systems once an | ||
| // outgoing contract has been fully resolved. For multi-hop contracts, if we | ||
| // resolve the outgoing contract, we'll also need to ensure that the incoming | ||
|
|
@@ -218,6 +229,10 @@ type ChainArbitratorConfig struct { | |
| // lower package. | ||
| QueryIncomingCircuit func(circuit models.CircuitKey) *models.CircuitKey | ||
|
|
||
| // StartupTimeout is the maximum time the arbitrator will wait when | ||
| // starting the individual channel arbitrators. | ||
| StartupTimeout time.Duration | ||
|
|
||
| // AuxLeafStore is an optional store that can be used to store auxiliary | ||
| // leaves for certain custom channel types. | ||
| AuxLeafStore fn.Option[lnwallet.AuxLeafStore] | ||
|
|
@@ -272,6 +287,12 @@ type ChainArbitrator struct { | |
| func NewChainArbitrator(cfg ChainArbitratorConfig, | ||
| db *channeldb.DB) *ChainArbitrator { | ||
|
|
||
| // Let's use the default timeout for tests. For normal operation, the | ||
| // value is validated when parsing the configuration. | ||
| if cfg.StartupTimeout == 0 { | ||
| cfg.StartupTimeout = DefaultChainArbTimeout | ||
| } | ||
|
|
||
| return &ChainArbitrator{ | ||
| cfg: cfg, | ||
| activeChannels: make(map[wire.OutPoint]*ChannelArbitrator), | ||
|
|
@@ -767,20 +788,67 @@ func (c *ChainArbitrator) Start() error { | |
|
|
||
| // Launch all the goroutines for each arbitrator so they can carry out | ||
| // their duties. | ||
| // Set a timeout for the group so we don't wait indefinitely if one | ||
| // startup function blocks. | ||
| ctx, cancel := context.WithTimeout( | ||
| context.Background(), c.cfg.StartupTimeout, | ||
| ) | ||
|
|
||
| eg, egCtx := errgroup.WithContext(ctx) | ||
|
|
||
| for _, arbitrator := range c.activeChannels { | ||
| startState, ok := startStates[arbitrator.cfg.ChanPoint] | ||
| if !ok { | ||
| stopAndLog() | ||
|
|
||
| // In case we encounter an error we need to cancel the | ||
| // context to ensure all goroutines are cleaned up. | ||
| cancel() | ||
| return fmt.Errorf("arbitrator: %v has no start state", | ||
| arbitrator.cfg.ChanPoint) | ||
| } | ||
|
|
||
| if err := arbitrator.Start(startState); err != nil { | ||
| stopAndLog() | ||
| return err | ||
| } | ||
| arb := arbitrator // Create new variable for closure | ||
| eg.Go(func() error { | ||
| // Create a non-blocking goroutine for the actual Start | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we might also check the context in the beginning before starting arbitrator. |
||
| // call | ||
| startErrChan := make(chan error, 1) | ||
| go func() { | ||
| startErrChan <- arb.Start(startState) | ||
| }() | ||
|
|
||
| select { | ||
| case startErr := <-startErrChan: | ||
| if startErr != nil { | ||
| cancel() | ||
| } | ||
|
|
||
| return startErr | ||
|
|
||
| case <-egCtx.Done(): | ||
| return egCtx.Err() | ||
|
|
||
| case <-c.quit: | ||
|
|
||
| return ErrChainArbExiting | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| // Wait for the error group to finish and if an error occurred we | ||
| // trigger a graceful shutdown of LND. | ||
| // | ||
| // NOTE: We do not add this collector to the waitGroup because we want | ||
| // to stop the chain arbitrator if there occurs an error. | ||
| go func() { | ||
| defer cancel() | ||
|
|
||
| if err := eg.Wait(); err != nil { | ||
| log.Criticalf("ChainArbitrator failed to start all "+ | ||
| "channel arbitrators: %v", err) | ||
| } | ||
| }() | ||
|
|
||
| // Subscribe to a single stream of block epoch notifications that we | ||
| // will dispatch to all active arbitrators. | ||
| blockEpoch, err := c.cfg.Notifier.RegisterBlockEpochNtfn(nil) | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: we don't actually clean up the goroutines yet (or I mean we don't interrupt the
arb.Start()yet, we just don't wait for it anymore). So can perhaps add a TODO here that we'll want to eventually add a context toStart()so things will clean up properly on cancel?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now we clean them up, I think adding a context ist not really the way to go because looking into the startup functionality it does not look like a context might help here.