|
31 | 31 | #ifndef WIN32 |
32 | 32 | # include <netinet/in.h> |
33 | 33 | # include <sys/socket.h> |
| 34 | +# include <sys/select.h> /* fd_set and select(); (or sys/time.h on older BSDs) */ |
34 | 35 | #else /* WIN32 */ |
35 | 36 | # include "wincompat.h" |
36 | 37 | #endif /* WIN32 */ |
@@ -366,25 +367,106 @@ void net_starttls(nut_ctype_t *client, size_t numarg, const char **arg) |
366 | 367 | return; |
367 | 368 | } |
368 | 369 |
|
369 | | - ret = SSL_accept(client->ssl); |
370 | | - switch (ret) |
| 370 | + /* SSL_accept() on a non-blocking socket (which upsd uses) requires a |
| 371 | + * retry loop. When SSL_accept() returns -1 with SSL_ERROR_WANT_READ or |
| 372 | + * SSL_ERROR_WANT_WRITE it is signalling a non-fatal "not done yet" |
| 373 | + * condition: the TLS handshake needs more I/O turns to complete. |
| 374 | + * The correct response is to wait for the fd to become ready in the |
| 375 | + * indicated direction and call SSL_accept() again with the SAME ssl |
| 376 | + * object and arguments (per OpenSSL docs for all versions >= 0.9.x). |
| 377 | + * |
| 378 | + * On Linux the loopback is fast enough that the handshake nearly always |
| 379 | + * completes in a single call, masking this requirement. On BSD, macOS, |
| 380 | + * illumos/OmniOS/OpenIndiana and other non-Linux platforms the loopback |
| 381 | + * socket behaviour differs enough that WANT_READ/WANT_WRITE are returned |
| 382 | + * regularly, causing the previous single-shot code to treat a transient |
| 383 | + * condition as a fatal error and tear down the connection. |
| 384 | + * |
| 385 | + * The retry behaviour and the SSL_ERROR_WANT_* codes are identical |
| 386 | + * across all supported OpenSSL versions (0.9.x, 1.0.x, 1.1.x, 3.x): |
| 387 | + * the API contract has never changed in this regard. |
| 388 | + */ |
371 | 389 | { |
372 | | - case 1: |
373 | | - client->ssl_connected = 1; |
374 | | - upsdebugx(3, "SSL connected (%s)", SSL_get_version(client->ssl)); |
375 | | - break; |
| 390 | + int ssl_err; |
| 391 | + int ssl_retries = 0; |
| 392 | + /* Cap retries to avoid spinning forever on a broken socket. |
| 393 | + * 250 * 20 ms = 5 s maximum wait, which is generous for a |
| 394 | + * local handshake while being safe for CI timeouts. */ |
| 395 | + const int SSL_ACCEPT_MAX_RETRIES = 250; |
| 396 | + fd_set fds; |
| 397 | + struct timeval tv; |
| 398 | + |
| 399 | + ret = -1; |
| 400 | + while (ssl_retries < SSL_ACCEPT_MAX_RETRIES) { |
| 401 | + ret = SSL_accept(client->ssl); |
| 402 | + |
| 403 | + if (ret == 1) { |
| 404 | + client->ssl_connected = 1; |
| 405 | + upsdebugx(3, "SSL_accept succeeded (%s)", |
| 406 | + SSL_get_version(client->ssl)); |
| 407 | + break; |
| 408 | + } |
376 | 409 |
|
377 | | - case 0: |
378 | | - upslog_with_errno(LOG_ERR, "SSL_accept do not accept handshake."); |
379 | | - ssl_error(client->ssl, ret); |
380 | | - break; |
| 410 | + ssl_err = SSL_get_error(client->ssl, ret); |
381 | 411 |
|
382 | | - case -1: |
383 | | - upslog_with_errno(LOG_ERR, "Unknown return value from SSL_accept"); |
384 | | - ssl_error(client->ssl, ret); |
385 | | - break; |
386 | | - default: |
387 | | - break; |
| 412 | + if (ssl_err == SSL_ERROR_WANT_READ |
| 413 | + || ssl_err == SSL_ERROR_WANT_WRITE |
| 414 | + ) { |
| 415 | + /* Non-fatal: handshake needs another I/O turn. |
| 416 | + * Wait up to 20 ms for the fd to be ready, then |
| 417 | + * retry SSL_accept() with the same ssl object. */ |
| 418 | + FD_ZERO(&fds); |
| 419 | + FD_SET(client->sock_fd, &fds); |
| 420 | + tv.tv_sec = 0; |
| 421 | + tv.tv_usec = 20000; /* 20 ms */ |
| 422 | + |
| 423 | + upsdebugx(4, |
| 424 | + "%s: SSL_accept WANT_%s, retry %d/%d", |
| 425 | + __func__, |
| 426 | + (ssl_err == SSL_ERROR_WANT_READ) |
| 427 | + ? "READ" : "WRITE", |
| 428 | + ssl_retries + 1, |
| 429 | + SSL_ACCEPT_MAX_RETRIES); |
| 430 | + |
| 431 | + if (select(client->sock_fd + 1, |
| 432 | + (ssl_err == SSL_ERROR_WANT_READ) ? &fds : NULL, |
| 433 | + (ssl_err == SSL_ERROR_WANT_WRITE) ? &fds : NULL, |
| 434 | + NULL, &tv) < 0 |
| 435 | + ) { |
| 436 | + upslog_with_errno(LOG_ERR, |
| 437 | + "%s: select() failed during SSL_accept", |
| 438 | + __func__); |
| 439 | + ssl_error(client->ssl, ret); |
| 440 | + return; |
| 441 | + } |
| 442 | + ssl_retries++; |
| 443 | + continue; |
| 444 | + } |
| 445 | + |
| 446 | + /* Any other error is fatal */ |
| 447 | + if (ret == 0) { |
| 448 | + upslog_with_errno(LOG_ERR, |
| 449 | + "%s: SSL_accept did not accept handshake" |
| 450 | + " (SSL_ERROR %d)", |
| 451 | + __func__, ssl_err); |
| 452 | + } else { |
| 453 | + upslog_with_errno(LOG_ERR, |
| 454 | + "%s: SSL_accept failed" |
| 455 | + " (SSL_ERROR %d)", |
| 456 | + __func__, ssl_err); |
| 457 | + } |
| 458 | + ssl_error(client->ssl, ret); |
| 459 | + return; |
| 460 | + } |
| 461 | + |
| 462 | + if (ssl_retries >= SSL_ACCEPT_MAX_RETRIES) { |
| 463 | + upslogx(LOG_ERR, |
| 464 | + "%s: SSL_accept timed out after %d retries" |
| 465 | + " (non-blocking handshake never completed)", |
| 466 | + __func__, ssl_retries); |
| 467 | + ssl_error(client->ssl, ret); |
| 468 | + return; |
| 469 | + } |
388 | 470 | } |
389 | 471 |
|
390 | 472 | # elif defined(WITH_NSS) /* not WITH_OPENSSL */ |
|
0 commit comments