From 3470804a69e985b43355b40e71faffa9ad29d679 Mon Sep 17 00:00:00 2001 From: bneradt Date: Thu, 10 Feb 2022 17:13:49 +0000 Subject: [PATCH] Revert "DNS: Fix lack of nameserver failover in low use circumstances. (#7843)" This reverts commit 2fbec37b64d9d01c4a8c45a80245b432b9484e18. I'm reverting this to test whether this commit introduced CI regression test instability. --- iocore/dns/DNS.cc | 28 +++++++--------------------- iocore/dns/P_DNSProcessor.h | 23 ++++++++--------------- 2 files changed, 15 insertions(+), 36 deletions(-) diff --git a/iocore/dns/DNS.cc b/iocore/dns/DNS.cc index 9ad2ea32285..e7e9ebd2bbd 100644 --- a/iocore/dns/DNS.cc +++ b/iocore/dns/DNS.cc @@ -524,12 +524,12 @@ DNSHandler::open_con(sockaddr const *target, bool failed, int icon, bool over_tc } return false; } else { + ns_down[icon] = 0; if (cur_con.eio.start(pd, &cur_con, EVENTIO_READ) < 0) { Error("[iocore_dns] open_con: Failed to add %d server to epoll list\n", icon); } else { - cur_con.num = icon; - ns_down[icon] = 0; - Debug("dns", "opening connection %s on fd %d SUCCEEDED for %d", ip_text, cur_con.fd, icon); + cur_con.num = icon; + Debug("dns", "opening connection %s SUCCEEDED for %d", ip_text, icon); } ret = true; } @@ -729,15 +729,6 @@ void DNSHandler::failover() { Debug("dns", "failover: initiating failover attempt, current name_server=%d", name_server); - if (!ns_down[name_server]) { - ip_text_buffer buff; - // mark this nameserver as down - Debug("dns", "failover: Marking nameserver %d as down", name_server); - ns_down[name_server] = 1; - Warning("connection to DNS server %s lost, marking as down", - ats_ip_ntop(&m_res->nsaddr_list[name_server].sa, buff, sizeof(buff))); - } - // no hope, if we have only one server if (m_res->nscount > 1) { ip_text_buffer buff1, buff2; @@ -775,8 +766,6 @@ DNSHandler::failover() ip_text_buffer buff; Warning("failover: connection to DNS server %s lost, retrying", ats_ip_ntop(&ip.sa, buff, sizeof(buff))); } - // Make sure retries are done even if no more requests. - this_ethread()->schedule_in(this, DNS_PRIMARY_RETRY_PERIOD); } /** Mark one of the nameservers as down. */ @@ -790,8 +779,6 @@ DNSHandler::rr_failure(int ndx) Debug("dns", "rr_failure: Marking nameserver %d as down", ndx); ns_down[ndx] = 1; Warning("connection to DNS server %s lost, marking as down", ats_ip_ntop(&m_res->nsaddr_list[ndx].sa, buff, sizeof(buff))); - // Make sure retries are done even if no more requests. - this_ethread()->schedule_in(this, DNS_PRIMARY_RETRY_PERIOD); } int nscount = m_res->nscount; @@ -984,11 +971,6 @@ DNSHandler::check_and_reset_tcp_conn() int DNSHandler::mainEvent(int event, Event *e) { - // If this was a scheduled retry event, clear the associated flag. - if (e && e->cookie == RETRY_COOKIE) { - this->nameserver_retry_in_flight_p = false; - } - recv_dns(event, e); if (dns_ns_rr) { if (DNS_CONN_MODE::TCP_RETRY == dns_conn_mode) { @@ -1035,6 +1017,10 @@ DNSHandler::mainEvent(int event, Event *e) write_dns(this); } + if (std::any_of(ns_down, ns_down + n_con, [](int f) { return f != 0; })) { + this_ethread()->schedule_at(this, DNS_PRIMARY_RETRY_PERIOD); + } + return EVENT_CONT; } diff --git a/iocore/dns/P_DNSProcessor.h b/iocore/dns/P_DNSProcessor.h index 7d0bba98d2c..4c16f32f993 100644 --- a/iocore/dns/P_DNSProcessor.h +++ b/iocore/dns/P_DNSProcessor.h @@ -169,16 +169,9 @@ struct DNSHandler : public Continuation { DNSConnection udpcon[MAX_NAMED]; Queue entries; Queue triggered; - int in_flight = 0; - int name_server = 0; - int in_write_dns = 0; - /// Rate limiter for down nameserver retries. - /// Don't schedule another if there is already one in flight. - std::atomic nameserver_retry_in_flight_p{false}; - /// Marker for event cookie to indicate it's a nameserver retry event. - /// @note Can't be @c constexpr because of the cast. - static inline void *const RETRY_COOKIE{reinterpret_cast(0x2)}; - + int in_flight = 0; + int name_server = 0; + int in_write_dns = 0; HostEnt *hostent_cache = nullptr; int ns_down[MAX_NAMED]; @@ -219,16 +212,16 @@ struct DNSHandler : public Continuation { (ink_hrtime)HRTIME_SECONDS(dns_failover_period)); Debug("dns", "\tdelta time is %" PRId64 "", (Thread::get_hrtime() - crossed_failover_number[i])); } - return ns_down[i] || (crossed_failover_number[i] && - ((Thread::get_hrtime() - crossed_failover_number[i]) > HRTIME_SECONDS(dns_failover_period))); + return (crossed_failover_number[i] && + ((Thread::get_hrtime() - crossed_failover_number[i]) > HRTIME_SECONDS(dns_failover_period))); } bool failover_soon(int i) { - return ns_down[i] || (crossed_failover_number[i] && - ((Thread::get_hrtime() - crossed_failover_number[i]) > - (HRTIME_SECONDS(dns_failover_try_period + failover_soon_number[i] * FAILOVER_SOON_RETRY)))); + return (crossed_failover_number[i] && + ((Thread::get_hrtime() - crossed_failover_number[i]) > + (HRTIME_SECONDS(dns_failover_try_period + failover_soon_number[i] * FAILOVER_SOON_RETRY)))); } void recv_dns(int event, Event *e);