Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 19 additions & 34 deletions .github/workflows/preflight.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,25 @@ on:

jobs:
preflight-tests:
name: "preflight-tests (${{ matrix.group }})"
if: ${{ github.repository == 'superfly/flyctl' }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
parallelism: [20]
index:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
group:
- apps
- deploy
- launch
- scale
- volume
- console
- logs
- machine
- postgres
- tokens
- wireguard
- misc
steps:
- uses: actions/checkout@v6
- uses: actions/setup-go@v6
Expand All @@ -32,14 +43,6 @@ jobs:
- name: Set FLY_PREFLIGHT_TEST_APP_PREFIX
run: |
echo "FLY_PREFLIGHT_TEST_APP_PREFIX=gha-$GITHUB_RUN_ID-$GITHUB_RUN_ATTEMPT" >> "$GITHUB_ENV"
- name: Generate go test slice
id: test_split
uses: hashicorp-forge/go-test-split-action@v1
with:
total: ${{ matrix.parallelism }}
index: ${{ matrix.index }}
packages: ./test/preflight/...
flags: --tags=integration
# If this workflow is triggered by code changes (eg PRs), download the binary to save time.
- uses: actions/download-artifact@v6
id: download-flyctl
Expand All @@ -53,37 +56,19 @@ jobs:
- name: Run preflight tests
id: preflight
env:
FLY_PREFLIGHT_TEST_ACCESS_TOKEN: ${{ secrets.FLYCTL_PREFLIGHT_CI_FLY_API_TOKEN }}
# Use user token if available (required for deploy token tests), otherwise fall back to limited token
FLY_PREFLIGHT_TEST_ACCESS_TOKEN: ${{ secrets.FLYCTL_PREFLIGHT_CI_USER_TOKEN || secrets.FLYCTL_PREFLIGHT_CI_FLY_API_TOKEN }}
FLY_PREFLIGHT_TEST_FLY_ORG: flyctl-ci-preflight
FLY_PREFLIGHT_TEST_FLY_REGIONS: ${{ inputs.region }}
FLY_PREFLIGHT_TEST_NO_PRINT_HISTORY_ON_FAIL: 'true'
FLY_FORCE_TRACE: 'true'
run: |
mkdir -p bin
if [ -e master-build/flyctl ]; then
mv master-build/flyctl bin/flyctl
fi
if [ -e bin/flyctl ]; then
chmod +x bin/flyctl
fi
(test -e master-build/flyctl) && mv master-build/flyctl bin/flyctl
chmod +x bin/flyctl
export PATH=$PWD/bin:$PATH
test_opts=""
if [[ "${{ github.ref }}" != "refs/heads/master" ]]; then
test_opts="-short"
fi
test_log="$(mktemp)"
function finish {
rm "$test_log"
}
trap finish EXIT
set +e
go test ./test/preflight/... --tags=integration -v -timeout=15m $test_opts -run "${{ steps.test_split.outputs.run }}" | tee "$test_log"
test_status=$?
set -e
echo -n failed= >> $GITHUB_OUTPUT
awk '/^--- FAIL:/{ printf("%s ", $3) }' "$test_log" >> $GITHUB_OUTPUT
echo >> $GITHUB_OUTPUT
exit $test_status
./scripts/preflight.sh -r "${{ github.ref }}" -g "${{ matrix.group }}" -o $GITHUB_OUTPUT
- name: Post failure to slack
if: ${{ github.ref == 'refs/heads/master' && failure() }}
uses: slackapi/slack-github-action@91efab103c0de0a537f72a35f6b8cda0ee76bf0a
Expand Down
27 changes: 24 additions & 3 deletions internal/build/imgsrc/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ func newRemoteDockerClient(ctx context.Context, apiClient flyutil.Client, flapsC

if !connectOverWireguard && !wglessCompatible {
client := &http.Client{
Timeout: 30 * time.Second, // Add timeout for each request
Transport: &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
return tls.Dial("tcp", fmt.Sprintf("%s.fly.dev:443", app.Name), &tls.Config{})
Expand All @@ -322,9 +323,29 @@ func newRemoteDockerClient(ctx context.Context, apiClient flyutil.Client, flapsC
fmt.Fprintln(streams.Out, streams.ColorScheme().Yellow("👀 checking remote builder compatibility with wireguardless deploys ..."))
span.AddEvent("checking remote builder compatibility with wireguardless deploys")

res, err := client.Do(req)
// Retry with backoff to allow DNS propagation time
var res *http.Response
b := &backoff.Backoff{
Min: 2 * time.Second,
Max: 30 * time.Second,
Factor: 2,
Jitter: true,
}
maxRetries := 10 // Up to ~5 minutes total with backoff
for attempt := 0; attempt < maxRetries; attempt++ {
res, err = client.Do(req)
if err == nil {
break
}

if attempt < maxRetries-1 {
dur := b.Duration()
terminal.Debugf("Remote builder compatibility check failed (attempt %d/%d), retrying in %s (err: %v)\n", attempt+1, maxRetries, dur, err)
pause.For(ctx, dur)
}
}
if err != nil {
tracing.RecordError(span, err, "failed to get remote builder settings")
tracing.RecordError(span, err, "failed to get remote builder settings after retries")
return nil, err
}

Expand Down Expand Up @@ -594,7 +615,7 @@ func buildRemoteClientOpts(ctx context.Context, apiClient flyutil.Client, appNam
}

func waitForDaemon(parent context.Context, client *dockerclient.Client) (up bool, err error) {
ctx, cancel := context.WithTimeout(parent, 2*time.Minute)
ctx, cancel := context.WithTimeout(parent, 5*time.Minute) // 5 minutes for daemon to become responsive (includes DNS propagation time)
defer cancel()

b := &backoff.Backoff{
Expand Down
4 changes: 2 additions & 2 deletions internal/build/imgsrc/ensure_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ func (p *Provisioner) createBuilder(ctx context.Context, region, builderName str
return nil, nil, retErr
}

retErr = flapsClient.Wait(ctx, builderName, mach, "started", 60*time.Second)
retErr = flapsClient.Wait(ctx, builderName, mach, "started", 180*time.Second) // 3 minutes for machine start + DNS propagation
if retErr != nil {
tracing.RecordError(span, retErr, "error waiting for builder machine to start")
return nil, nil, retErr
Expand Down Expand Up @@ -582,7 +582,7 @@ func restartBuilderMachine(ctx context.Context, appName string, builderMachine *
return err
}

if err := flapsClient.Wait(ctx, appName, builderMachine, "started", time.Second*60); err != nil {
if err := flapsClient.Wait(ctx, appName, builderMachine, "started", time.Second*180); err != nil { // 3 minutes for restart + DNS propagation
tracing.RecordError(span, err, "error waiting for builder machine to start")
return err
}
Expand Down
6 changes: 5 additions & 1 deletion internal/command/console/console.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,11 @@ func runConsole(ctx context.Context) error {
consoleCommand = flag.GetString(ctx, "command")
}

return ssh.Console(ctx, sshClient, consoleCommand, true, params.Container)
// Allocate PTY only when no command is specified or when explicitly requested
// This matches the behavior of `fly ssh console`
allocPTY := consoleCommand == "" || flag.GetBool(ctx, "pty")

return ssh.Console(ctx, sshClient, consoleCommand, allocPTY, params.Container)
}

func selectMachine(ctx context.Context, app *fly.AppCompact, appConfig *appconfig.Config) (*fly.Machine, func(), error) {
Expand Down
4 changes: 3 additions & 1 deletion internal/command/deploy/machines_deploymachinesapp.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ func (md *machineDeployment) DeployMachinesApp(ctx context.Context) error {

if updateErr := md.updateReleaseInBackend(ctx, status, metadata); updateErr != nil {
if err == nil {
err = fmt.Errorf("failed to set final release status: %w", updateErr)
// Deployment succeeded, but we couldn't update the release status
// This is not critical enough to fail the entire deployment
terminal.Warnf("failed to set final release status after successful deployment: %v\n", updateErr)
} else {
terminal.Warnf("failed to set final release status after deployment failure: %v\n", updateErr)
}
Expand Down
7 changes: 4 additions & 3 deletions internal/command/scale/count_machines.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,14 +317,15 @@ func computeActions(appName string, machines []*fly.Machine, expectedGroupCounts
delete(mConfig.Env, "FLY_STANDBY_FOR")

for region, delta := range regionDiffs {
existingMachinesInRegion := perRegionMachines[region]
actions = append(actions, &planItem{
GroupName: groupName,
Region: region,
Delta: delta,
Machines: perRegionMachines[region],
Machines: existingMachinesInRegion,
LaunchMachineInput: &fly.LaunchMachineInput{Region: region, Config: mConfig, MinSecretsVersion: minvers},
Volumes: defaults.PopAvailableVolumes(mConfig, region, delta),
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta),
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta, len(existingMachinesInRegion)),
})
}
}
Expand Down Expand Up @@ -352,7 +353,7 @@ func computeActions(appName string, machines []*fly.Machine, expectedGroupCounts
Delta: delta,
LaunchMachineInput: &fly.LaunchMachineInput{Region: region, Config: mConfig, MinSecretsVersion: minvers},
Volumes: defaults.PopAvailableVolumes(mConfig, region, delta),
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta),
CreateVolumeRequest: defaults.CreateVolumeRequest(mConfig, region, delta, 0), // No existing machines for new groups
})
}
}
Expand Down
10 changes: 8 additions & 2 deletions internal/command/scale/machine_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,23 @@ func (d *defaultValues) PopAvailableVolumes(mConfig *fly.MachineConfig, region s
return availableVolumes
}

func (d *defaultValues) CreateVolumeRequest(mConfig *fly.MachineConfig, region string, delta int) *fly.CreateVolumeRequest {
func (d *defaultValues) CreateVolumeRequest(mConfig *fly.MachineConfig, region string, delta int, existingMachineCount int) *fly.CreateVolumeRequest {
if len(mConfig.Mounts) == 0 || delta <= 0 {
return nil
}
mount := mConfig.Mounts[0]

// Enable RequireUniqueZone for HA scenarios (when total machines in region > 1)
// This ensures volumes (and their attached machines) are distributed across different hosts
totalMachinesInRegion := existingMachineCount + delta
requireUniqueZone := totalMachinesInRegion > 1

return &fly.CreateVolumeRequest{
Name: mount.Name,
Region: region,
SizeGb: &mount.SizeGb,
Encrypted: fly.Pointer(mount.Encrypted),
RequireUniqueZone: fly.Pointer(false),
RequireUniqueZone: fly.Pointer(requireUniqueZone),
SnapshotID: d.snapshotID,
ComputeRequirements: mConfig.Guest,
ComputeImage: mConfig.Image,
Expand Down
31 changes: 5 additions & 26 deletions scanner/rails_dockerfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,8 @@ CMD ["rails", "server"]
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
require.NoError(t, err)

// Change to test directory
originalDir, _ := os.Getwd()
defer os.Chdir(originalDir)
err = os.Chdir(dir)
require.NoError(t, err)

// Run the scanner - it should detect the Rails app
// No need to change directories, configureRails accepts a directory path
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup

Expand Down Expand Up @@ -89,11 +84,7 @@ CMD ["rails", "server"]`
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
require.NoError(t, err)

originalDir, _ := os.Getwd()
defer os.Chdir(originalDir)
err = os.Chdir(dir)
require.NoError(t, err)

// No need to change directories, configureRails accepts a directory path
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup
require.NoError(t, err)
Expand Down Expand Up @@ -123,11 +114,7 @@ CMD ["rails", "server"]`
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
require.NoError(t, err)

originalDir, _ := os.Getwd()
defer os.Chdir(originalDir)
err = os.Chdir(dir)
require.NoError(t, err)

// No need to change directories, configureRails accepts a directory path
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup
require.NoError(t, err)
Expand All @@ -150,12 +137,8 @@ CMD ["rails", "server"]`

// Note: No Dockerfile created

originalDir, _ := os.Getwd()
defer os.Chdir(originalDir)
err = os.Chdir(dir)
require.NoError(t, err)

// This test would need bundle to not be available, which is hard to simulate
// No need to change directories, configureRails accepts a directory path
// The scanner will either find bundle (and try to use it) or not find it
// If bundle is not found and no Dockerfile exists, it should fail

Expand Down Expand Up @@ -199,11 +182,7 @@ EXPOSE 3000`
err = os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte(customDockerfile), 0644)
require.NoError(t, err)

originalDir, _ := os.Getwd()
defer os.Chdir(originalDir)
err = os.Chdir(dir)
require.NoError(t, err)

// No need to change directories, configureRails accepts a directory path
si, err := configureRails(dir, &ScannerConfig{SkipHealthcheck: true})
drainHealthcheckChannel() // Wait for goroutine to complete before cleanup
require.NoError(t, err)
Expand Down
Loading
Loading