diff --git a/.asf.yaml b/.asf.yaml
index 823d187874e..d6fa4d21130 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -33,7 +33,7 @@ github:
strict: false
contexts:
- AuTest
- - CentOS
+ - Rocky
- Clang-Analyzer
- Clang-Format
- Debian
diff --git a/build/hiredis.m4 b/build/hiredis.m4
index 1a9a18c0273..b2f09179be0 100644
--- a/build/hiredis.m4
+++ b/build/hiredis.m4
@@ -34,7 +34,7 @@ AC_ARG_WITH(hiredis, [AS_HELP_STRING([--with-hiredis=DIR],[use a specific hiredi
case "$hiredis_base_dir" in
*":"*)
- hidredis_include="`echo $hiredis_base_dir |sed -e 's/:.*$//'`"
+ hiredis_include="`echo $hiredis_base_dir |sed -e 's/:.*$//'`"
hiredis_ldflags="`echo $hiredis_base_dir |sed -e 's/^.*://'`"
AC_MSG_CHECKING(for hiredis includes in $hiredis_include libs in $hiredis_ldflags )
;;
diff --git a/build/libswoc.m4 b/build/libswoc.m4
new file mode 100644
index 00000000000..e4e03b9faf6
--- /dev/null
+++ b/build/libswoc.m4
@@ -0,0 +1,97 @@
+dnl -------------------------------------------------------- -*- autoconf -*-
+dnl Licensed to the Apache Software Foundation (ASF) under one or more
+dnl contributor license agreements. See the NOTICE file distributed with
+dnl this work for additional information regarding copyright ownership.
+dnl The ASF licenses this file to You under the Apache License, Version 2.0
+dnl (the "License"); you may not use this file except in compliance with
+dnl the License. You may obtain a copy of the License at
+dnl
+dnl http://www.apache.org/licenses/LICENSE-2.0
+dnl
+dnl Unless required by applicable law or agreed to in writing, software
+dnl distributed under the License is distributed on an "AS IS" BASIS,
+dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+dnl See the License for the specific language governing permissions and
+dnl limitations under the License.
+
+dnl
+dnl libswoc.m4: Trafficserver's libswoc autoconf macros
+dnl
+
+dnl
+dnl TS_CHECK_LIBSWOC: look for libswoc libraries and headers
+dnl
+AC_DEFUN([TS_CHECK_LIBSWOC], [
+has_libswoc=no
+AC_ARG_WITH(libswoc, [AS_HELP_STRING([--with-libswoc=DIR],[use a specific libswoc library])],
+[
+ if test "x$withval" != "xyes" && test "x$withval" != "x"; then
+ libswoc_base_dir="$withval"
+ if test "$withval" != "no"; then
+ has_libswoc=yes
+ case "$withval" in
+ *":"*)
+ swoc_include="`echo $withval |sed -e 's/:.*$//'`"
+ swoc_ldflags="`echo $withval |sed -e 's/^.*://'`"
+ AC_MSG_CHECKING(checking for libswoc includes in $swoc_include libs in $swoc_ldflags )
+ ;;
+ *)
+ swoc_include="$withval/include"
+ swoc_ldflags="$withval/lib"
+ libswoc_base_dir="$withval"
+ AC_MSG_CHECKING(libswoc includes in $withval libs in $swoc_ldflags)
+ ;;
+ esac
+ fi
+ fi
+
+ if test -d $swoc_include && test -d $swoc_ldflags && test -f $swoc_include/libswoc/yaml.h; then
+ AC_MSG_RESULT([ok])
+ else
+ AC_MSG_RESULT([not found])
+ fi
+
+if test "$has_libswoc" != "no"; then
+ saved_ldflags=$LDFLAGS
+ saved_cppflags=$CPPFLAGS
+
+ SWOC_LIBS=-lswoc
+ if test "$libswoc_base_dir" != "/usr"; then
+ SWOC_INCLUDES=-I${swoc_include}
+ SWOC_LDFLAGS=-L${swoc_ldflags}
+
+ TS_ADDTO_RPATH(${swoc_ldflags})
+ fi
+
+ if test "$swoc_include" != "0"; then
+ SWOC_INCLUDES=-I${swoc_include}
+ else
+ has_libswoc=no
+ CPPFLAGS=$saved_cppflags
+ LDFLAGS=$saved_ldflags
+ fi
+fi
+],
+[
+ has_libswoc=no
+ SWOC_INCLUDES=-I\${abs_top_srcdir}/lib/swoc/include
+ SWOC_LIBS=-lswoc
+ SWOC_LDFLAGS=-L\${abs_top_builddir}/lib/swoc
+])
+
+AC_SUBST([SWOC_INCLUDES])
+AC_SUBST([SWOC_LIBS])
+AC_SUBST([SWOC_LDFLAGS])
+
+])
+
+dnl TS_CHECK_SWOC: check if we want to export libswoc headers from trafficserver. default: not exported
+AC_DEFUN([TS_CHECK_SWOC_HEADERS_EXPORT], [
+AC_MSG_CHECKING([whether to export libswoc headers])
+AC_ARG_ENABLE([swoc-headers],
+ [AS_HELP_STRING([--enable-swoc-headers],[Export libswoc headers])],
+ [],
+ [enable_swoc_headers=no]
+)
+AC_MSG_RESULT([$enable_swoc_headers])
+])
diff --git a/build/nuraft.m4 b/build/nuraft.m4
new file mode 100644
index 00000000000..dca9e96a9bb
--- /dev/null
+++ b/build/nuraft.m4
@@ -0,0 +1,85 @@
+dnl -------------------------------------------------------- -*- autoconf -*-
+dnl Licensed to the Apache Software Foundation (ASF) under one or more
+dnl contributor license agreements. See the NOTICE file distributed with
+dnl this work for additional information regarding copyright ownership.
+dnl The ASF licenses this file to You under the Apache License, Version 2.0
+dnl (the "License"); you may not use this file except in compliance with
+dnl the License. You may obtain a copy of the License at
+dnl
+dnl http://www.apache.org/licenses/LICENSE-2.0
+dnl
+dnl Unless required by applicable law or agreed to in writing, software
+dnl distributed under the License is distributed on an "AS IS" BASIS,
+dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+dnl See the License for the specific language governing permissions and
+dnl limitations under the License.
+
+dnl
+dnl nuraft.m4: Trafficserver's nuraft autoconf macros
+dnl
+
+dnl
+dnl TS_CHECK_NURAFT: look for nuraft libraries and headers
+dnl
+
+AC_DEFUN([TS_CHECK_NURAFT], [
+has_nuraft=no
+AC_ARG_WITH(nuraft, [AC_HELP_STRING([--with-nuraft=DIR], [use a specific nuraft library])],
+[
+ if test "x$withval" != "xyes" && test "x$withval" != "x"; then
+ nuraft_base_dir="$withval"
+ if test "$withval" != "no"; then
+ has_nuraft=yes
+ case "$withval" in
+ *":"*)
+ nuraft_include="`echo $withval | sed -e 's/:.*$//'`"
+ nuraft_ldflags="`echo $withval | sed -e 's/^.*://'`"
+ AC_MSG_CHECKING(for nuraft includes in $nuraft_include libs in $nuraft_ldflags)
+ ;;
+ *)
+ nuraft_include="$withval/include"
+ nuraft_ldflags="$withval/lib"
+ nuraft_base_dir="$withval"
+ AC_MSG_CHECKING(for nuraft includes in $nuraft_include libs in $nuraft_ldflags)
+ ;;
+ esac
+ fi
+ fi
+
+ if test -d $nuraft_include && test -d $nuraft_ldflags && test -f $nuraft_include/libnuraft/nuraft.hxx; then
+ AC_MSG_RESULT([ok])
+ else
+ AC_MSG_RESULT([not found])
+ fi
+
+if test "$has_nuraft" != "no"; then
+ saved_ldflags=$LDFLAGS
+ saved_cppflags=$CPPFLAGS
+
+ NURAFT_LIBS=-lnuraft
+ if test "$nuraft_base_dir" != "/usr"; then
+ NURAFT_INCLUDES=-I${nuraft_include}
+ NURAFT_LDFLAGS=-L${nuraft_ldflags}
+
+ TS_ADDTO(CPPFLAGS, [${NURAFT_INCLUDES}])
+ TS_ADDTO(LDFLAGS, [${NURAFT_LDFLAGS}])
+ TS_ADDTO_RPATH(${nuraft_ldflags})
+ fi
+
+ if test "$nuraft_include" != "0"; then
+ NURAFT_INCLUDES=-I${nuraft_include}
+ else
+ has_nuraft=no
+ CPPFLAGS=$saved_cppflags
+ LDFLAGS=$saved_ldflags
+ fi
+fi
+],
+[
+ has_nuraft=no
+])
+
+AC_SUBST([NURAFT_INCLUDES])
+AC_SUBST([NURAFT_LIBS])
+AC_SUBST([NURAFT_LDFLAGS])
+])
diff --git a/configs/body_factory/default/Makefile.am b/configs/body_factory/default/Makefile.am
index a24d2e290dc..69eb6b6810f 100644
--- a/configs/body_factory/default/Makefile.am
+++ b/configs/body_factory/default/Makefile.am
@@ -28,6 +28,7 @@ dist_bodyfactory_DATA = \
connect\#dns_failed \
connect\#failed_connect \
connect\#hangup \
+ connect\#all_dead \
default \
interception\#no_host \
README \
diff --git a/configs/body_factory/default/connect#all_dead b/configs/body_factory/default/connect#all_dead
new file mode 100644
index 00000000000..7e18a62986f
--- /dev/null
+++ b/configs/body_factory/default/connect#all_dead
@@ -0,0 +1,17 @@
+
+
+No Valid Host
+
+
+
+
No Valid Host
+
+
+
+Description: Unable to find a valid target host.
+
+The server was found but all of the addresses are marked dead and so there is
+no valid target address to which to connect. Please try again after a few minutes.
+
+
+
diff --git a/configure.ac b/configure.ac
index 456aa83d52d..1745358ed5b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -175,6 +175,13 @@ AC_ARG_ENABLE([all-asserts],
)
AC_MSG_RESULT([$enable_all_asserts])
+AC_MSG_CHECKING([whether to enable event tracker])
+AC_ARG_ENABLE([event-tracker],
+ [AS_HELP_STRING([--enable-event-tracker],[turn on event tracker])],
+ [],
+ [enable_event_tracker=no]
+)
+AC_MSG_RESULT([$enable_event_tracker])
# Enable code coverage instrumentation only if requested by the user.
AC_MSG_CHECKING([whether to code coverage])
@@ -856,7 +863,7 @@ case $host_os_def in
common_opt="-pipe -Wall -Wextra -Wno-ignored-qualifiers -Wno-unused-parameter -Wno-format-truncation -Wno-cast-function-type -Wno-stringop-overflow"
debug_opt="-ggdb3 $common_opt"
release_opt="-g $common_opt $optimizing_flags -feliminate-unused-debug-symbols -fno-strict-aliasing"
- cxx_opt="-Wno-invalid-offsetof -Wno-noexcept-type"
+ cxx_opt="-Wno-invalid-offsetof -Wno-noexcept-type -Wsuggest-override"
# Special options for flex generated .c files
flex_cflags="-Wno-unused-parameter"
])
@@ -897,7 +904,7 @@ case $host_os_def in
common_opt="-pipe -Wall -Wextra -Wno-ignored-qualifiers -Wno-unused-parameter"
debug_opt="-ggdb3 $common_opt"
release_opt="-g $common_opt $optimizing_flags -feliminate-unused-debug-symbols -fno-strict-aliasing"
- cxx_opt="-Wno-invalid-offsetof"
+ cxx_opt="-Wno-invalid-offsetof -Wsuggest-override"
])
AS_IF([test -d /usr/local/lib], [
@@ -975,10 +982,15 @@ fi
if test "x${enable_mime_sanity_check}" = "xyes"; then
TS_ADDTO(AM_CPPFLAGS, [-DENABLE_MIME_SANITY_CHECK])
fi
+
if test "x${enable_all_asserts}" = "xyes"; then
TS_ADDTO(AM_CPPFLAGS, [-DENABLE_ALL_ASSERTS])
fi
+if test "x${enable_event_tracker}" = "xyes"; then
+ TS_ADDTO(AM_CPPFLAGS, [-DENABLE_EVENT_TRACKER])
+fi
+
# Flags for ASAN
if test "x${enable_asan}" = "xyes"; then
if test "x${enable_tsan}" = "xyes" -o "x${enable_tsan}" = "xstatic"; then
@@ -1446,14 +1458,23 @@ AM_CONDITIONAL([BUILD_YAML_CPP], [test x"$has_yaml_cpp" = x"no"])
TS_CHECK_YAML_HEADERS_EXPORT
AM_CONDITIONAL([EXPORT_YAML_HEADERS], [test x"$enable_yaml_headers" = x"yes"])
+TS_CHECK_LIBSWOC
+AM_CONDITIONAL([BUILD_SWOC], [test x"$has_libswoc" = x"no"])
+
+TS_CHECK_SWOC_HEADERS_EXPORT
+AM_CONDITIONAL([EXPORT_SWOC_HEADER], [test x"$enable_swoc_headers" = x"yes"])
+
# Check for optional boringocsp library
TS_CHECK_BORINGOCSP
# Check for optional hiredis library
TS_CHECK_HIREDIS
-
AM_CONDITIONAL([BUILD_SSL_SESSION_REUSE_PLUGIN], [test ! -z "${LIB_HIREDIS}" -a "x${has_hiredis}" = "x1" ])
+# Check for optional nuraft library
+TS_CHECK_NURAFT
+AM_CONDITIONAL([BUILD_STEK_SHARE_PLUGIN], [test x"$has_nuraft" = x"yes"])
+
# Check for backtrace() support
has_backtrace=0
AC_CHECK_HEADERS([execinfo.h], [has_backtrace=1],[])
@@ -2312,13 +2333,15 @@ AC_MSG_NOTICE([Build option summary:
CXXFLAGS: $CXXFLAGS
CPPFLAGS: $CPPFLAGS
LDFLAGS: $LDFLAGS
- AM@&t@_CFLAGS: $AM_CFLAGS
- AM@&t@_CXXFLAGS: $AM_CXXFLAGS
- AM@&t@_CPPFLAGS: $AM_CPPFLAGS
- AM@&t@_LDFLAGS: $AM_LDFLAGS
+ AM@&t@_CFLAGS: $AM_CFLAGS
+ AM@&t@_CXXFLAGS: $AM_CXXFLAGS
+ AM@&t@_CPPFLAGS: $AM_CPPFLAGS
+ AM@&t@_LDFLAGS: $AM_LDFLAGS
TS_INCLUDES: $TS_INCLUDES
OPENSSL_LDFLAGS: $OPENSSL_LDFLAGS
OPENSSL_INCLUDES: $OPENSSL_INCLUDES
YAMLCPP_LDFLAGS: $YAMLCPP_LDFLAGS
YAMLCPP_INCLUDES: $YAMLCPP_INCLUDES
+ NURAFT_LDFLAGS: $NURAFT_LDFLAGS
+ NURAFT_INCLUDES: $NURAFT_INCLUDES
])
diff --git a/doc/Pipfile b/doc/Pipfile
index 42a9f5ad514..d481b2a75bc 100644
--- a/doc/Pipfile
+++ b/doc/Pipfile
@@ -23,15 +23,17 @@ verify_ssl = true
[packages]
-# The latest 4.x sphinx release, currently 4.0.2, fails `make html`. For
-# details, see: https://github.com/apache/trafficserver/issues/7938
-#
-# The 3.x releases build fine, however. So we currently pin to that.
+# The latest 4.x sphinx release has issues with style rendering. The 3.x
+# releases build and render fine, however. So we currently pin to that.
#
# Once that issue, either with sphinx or our docs, is resolved, then we should
# unpin sphinx by setting the following to "*".
sphinx = "==3.*"
+# Sphinx 3.x builds break with the latest jinja2. This jinja2 pin can be
+# removed when we move to Sphinx 4.x.
+jinja2 = "<3.1"
+
sphinx-rtd-theme = "*"
sphinxcontrib-plantuml = "*"
# i18n
diff --git a/doc/admin-guide/files/parent.config.en.rst b/doc/admin-guide/files/parent.config.en.rst
index 50fa96fe737..dcb12a0c77c 100644
--- a/doc/admin-guide/files/parent.config.en.rst
+++ b/doc/admin-guide/files/parent.config.en.rst
@@ -222,6 +222,7 @@ The following list shows the possible actions and their allowed values.
parent is marked down and a new parent is selected to retry the request. The number of
retries is controlled by ``max_unavailable_server_retries`` which is set to 1 by default.
- ``both`` - This enables both ``simple_retry`` and ``unavailable_server_retry`` as described above.
+ - If not set, by default all response codes will be considered a success, and parents will not be retried based on any HTTP response code.
.. Note::
@@ -270,7 +271,7 @@ The following list shows the possible actions and their allowed values.
- ``strict`` - Traffic Server machines serve requests strictly in
turn. For example: machine ``proxy1`` serves the first request,
``proxy2`` serves the second request, and so on.
- - ``false`` - Round robin selection does not occur.
+ - ``false`` - The default. Round robin selection does not occur.
- ``consistent_hash`` - consistent hash of the url so that one parent
is chosen for a given url. If a parent is down, the traffic that
would go to the down parent is rehashed amongst the remaining parents.
@@ -290,7 +291,7 @@ The following list shows the possible actions and their allowed values.
``go_direct``
One of the following values:
- - ``true`` - requests bypass parent hierarchies and go directly to
+ - ``true`` - The default. Requests bypass parent hierarchies and go directly to
the origin server.
- ``false`` - requests do not bypass parent hierarchies.
@@ -300,7 +301,7 @@ The following list shows the possible actions and their allowed values.
``qstring``
One of the following values:
- - ``consider`` - Use the query string when finding a parent.
+ - ``consider`` - The default. Use the query string when finding a parent.
- ``ignore`` - Do not consider the query string when finding a parent. This
is especially useful when using the ``consistent_hash`` selection strategy,
diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst
index cc0747d3adf..c311690c8c8 100644
--- a/doc/admin-guide/files/records.config.en.rst
+++ b/doc/admin-guide/files/records.config.en.rst
@@ -2278,10 +2278,11 @@ Cache Control
:overridable:
When enabled (``1``), |TS| will attempt to write (lock) the URL
- to cache. This is rarely useful (at the moment), since it'll only be able
- to write to cache if the origin has ignored the ``Range:`` header. For a use
- case where you know the origin will respond with a full (``200``) response,
- you can turn this on to allow it to be cached.
+ to cache for a request specifying a range. This is useful when the origin server
+ might ignore a range request and respond with a full (``200``) response.
+ Additionally, this setting will attempt to transform a 200 response from the origin
+ server to a partial (``206``) response, honoring the requested range, while
+ caching the full response.
.. ts:cv:: CONFIG proxy.config.http.cache.ignore_accept_mismatch INT 2
:reloadable:
@@ -4064,15 +4065,13 @@ OCSP Stapling Configuration
Number of seconds before an OCSP response expires in the stapling cache.
- See :ref:`admin-performance-timeouts` for more discussion on |TS| timeouts.
-
.. ts:cv:: CONFIG proxy.config.ssl.ocsp.request_timeout INT 10
+ :units: seconds
Timeout (in seconds) for queries to OCSP responders.
- See :ref:`admin-performance-timeouts` for more discussion on |TS| timeouts.
-
.. ts:cv:: CONFIG proxy.config.ssl.ocsp.update_period INT 60
+ :units: seconds
Update period (in seconds) for stapling caches.
@@ -4116,11 +4115,13 @@ HTTP/2 Configuration
.. ts:cv:: CONFIG proxy.config.http2.initial_window_size_in INT 65535
:reloadable:
+ :units: bytes
The initial window size for inbound connections.
.. ts:cv:: CONFIG proxy.config.http2.max_frame_size INT 16384
:reloadable:
+ :units: bytes
Indicates the size of the largest frame payload that the sender is willing
to receive.
@@ -4153,6 +4154,7 @@ HTTP/2 Configuration
.. ts:cv:: CONFIG proxy.config.http2.active_timeout_in INT 0
:reloadable:
+ :units: seconds
This is the active timeout of the http2 connection. It is set when the connection is opened
and keeps ticking regardless of activity level.
@@ -4161,6 +4163,7 @@ HTTP/2 Configuration
.. ts:cv:: CONFIG proxy.config.http2.accept_no_activity_timeout INT 120
:reloadable:
+ :units: seconds
Specifies how long |TS| keeps connections to clients open if no
activity is received on the connection. Lowering this timeout can ease
@@ -4169,6 +4172,7 @@ HTTP/2 Configuration
.. ts:cv:: CONFIG proxy.config.http2.no_activity_timeout_in INT 120
:reloadable:
+ :units: seconds
Specifies how long |TS| keeps connections to clients open if a
transaction stalls. Lowering this timeout can ease pressure on the proxy if
@@ -4249,6 +4253,7 @@ HTTP/2 Configuration
.. ts:cv:: CONFIG proxy.config.http2.write_buffer_block_size INT 262144
:reloadable:
+ :units: bytes
Specifies the size of a buffer block that is used for buffering outgoing
HTTP/2 frames. The size will be rounded up based on power of 2.
@@ -4642,7 +4647,7 @@ Sockets
Sets the receive buffer size for connections from the client to |TS|.
-.. ts:cv:: CONFIG proxy.config.net.sock_option_flag_in INT 0x5
+.. ts:cv:: CONFIG proxy.config.net.sock_option_flag_in INT 0x1
Turns different options "on" for the socket handling client connections:::
diff --git a/doc/admin-guide/logging/formatting.en.rst b/doc/admin-guide/logging/formatting.en.rst
index 328dfb69f90..20ae0cfb0eb 100644
--- a/doc/admin-guide/logging/formatting.en.rst
+++ b/doc/admin-guide/logging/formatting.en.rst
@@ -636,6 +636,9 @@ cqssu Client Request SSL Elliptic Curve used by |TS| to communicate with the
cqssa Client Request ALPN Protocol ID negotiated with the client.
pqssl Proxy Request Indicates whether the connection from |TS| to the origin
was over SSL or not.
+pqssr Proxy Request SSL session ticket reused status from |TS| to the origin;
+ indicates if the current request hit the SSL session ticket
+ and avoided a full SSL handshake.
pscert Proxy Request 1 if origin requested certificate from |TS| during TLS
handshake but no client certificate was defined. 2 if origin
requested certificate from |TS| during TLS handshake and a
diff --git a/doc/admin-guide/monitoring/statistics/core/http-connection.en.rst b/doc/admin-guide/monitoring/statistics/core/http-connection.en.rst
index 0baa9dba815..e00885c294d 100644
--- a/doc/admin-guide/monitoring/statistics/core/http-connection.en.rst
+++ b/doc/admin-guide/monitoring/statistics/core/http-connection.en.rst
@@ -149,6 +149,16 @@ HTTP Connection
Tracks the number of client requests that did not have a request sent to the origin server because the origin server was marked dead.
+.. ts:stat:: global proxy.process.http.http_proxy_loop_detected integer
+ :type: counter
+
+ Counts the number of times a proxy loop was detected
+
+.. ts:stat:: global proxy.process.http.http_proxy_mh_loop_detected integer
+ :type: counter
+
+ Counts the number of times a multi-hop proxy loop was detected
+
HTTP/2
------
diff --git a/doc/admin-guide/plugins/cache_range_requests.en.rst b/doc/admin-guide/plugins/cache_range_requests.en.rst
index eb01960f707..e677bc528dd 100644
--- a/doc/admin-guide/plugins/cache_range_requests.en.rst
+++ b/doc/admin-guide/plugins/cache_range_requests.en.rst
@@ -118,6 +118,8 @@ X-Crr-Ims header support
.. option:: --consider-ims
.. option:: -c
+.. option:: --ims-header=[header name] (default: X-Crr-Ims)
+.. option:: -i
To support slice plugin self healing an option to force revalidation
after cache lookup complete was added. This option is triggered by a
@@ -137,6 +139,11 @@ In order for this to properly work in a CDN each cache in the
chain *SHOULD* also contain a remap rule with the
:program:`cache_range_requests` plugin with this option set.
+When used with the :program:`slice` plugin its `--crr-ims-header`
+option must have the same value (or not be defined) in order to work.
+
+Presence of the `--ims-header` automatically sets the `--consider-ims` option.
+
Don't modify the Cache Key
--------------------------
diff --git a/doc/admin-guide/plugins/index.en.rst b/doc/admin-guide/plugins/index.en.rst
index a848475732b..12ff6ad50e5 100644
--- a/doc/admin-guide/plugins/index.en.rst
+++ b/doc/admin-guide/plugins/index.en.rst
@@ -172,6 +172,7 @@ directory of the |TS| source tree. Experimental plugins can be compiled by passi
Slice
SSL Headers
SSL Session Reuse
+ STEK Share
System Statistics
Traffic Dump
WebP Transform
@@ -265,6 +266,9 @@ directory of the |TS| source tree. Experimental plugins can be compiled by passi
:doc:`SSL Headers `
Populate request headers with SSL session information.
+:doc:`STEK Share `
+ Coordinates STEK (Session Ticket Encryption Key) between ATS instances running in a group.
+
:doc:`System Stats `
Inserts system statistics in to the stats list
diff --git a/doc/admin-guide/plugins/lua.en.rst b/doc/admin-guide/plugins/lua.en.rst
index 02b6421c25f..8ba07fbad81 100644
--- a/doc/admin-guide/plugins/lua.en.rst
+++ b/doc/admin-guide/plugins/lua.en.rst
@@ -155,7 +155,29 @@ adding a configuration option to records.config.
CONFIG proxy.config.plugin.lua.max_states INT 64
-Any per plugin --states value overrides this default value but must be less than or equal to this value. This setting is not reloadable since it must be applied when all the lua states are first initialized.
+Any per plugin --states value overrides this default value but must be less than or equal to this value. This setting is not
+reloadable since it must be applied when all the lua states are first initialized.
+
+For remap instances, the LuaJIT garbage collector can be set to be called automatically whenever a remap instance is created
+or deleted. This happens when the remap.config file has been modified, and the configuration has been reloaded. This does
+not apply to global plugin instances since these exist for the life-time of the ATS process, i.e., they are not reloadable or
+reconfigurable by modifying plugin.config while ATS is running.
+
+By default, the LuaJIT garbage collector will run on its own according to its own internal criteria. However, in some cases,
+the garbage collector should be run in a guaranteed fashion.
+
+For example, in Linux, total Lua memory may be limited to 2GB depending on the LuaJIT version. It may be required to release
+memory on demand in order to prevent out of memory errors when running close to the memory limit. Note that the memory usage
+is doubled during configuration reloads since the ATS must hold both the current and new configurations during the
+transition. If garbage collection occurs does not occur immediately, memory usage may exceed this double usage.
+
+On demand garbage collection can be enabled by adding the following to each remap line. A value of '1' means
+enabled. The default value of '0' means disabled.
+
+::
+
+ map http://a.tbcdn.cn/ http://inner.tbcdn.cn/ @plugin=/XXX/tslua.so @pparam=--ljgc=1
+
Configuration for JIT mode
==========================
diff --git a/doc/admin-guide/plugins/money_trace.en.rst b/doc/admin-guide/plugins/money_trace.en.rst
index 41d7406dc45..02c63e6aeb0 100644
--- a/doc/admin-guide/plugins/money_trace.en.rst
+++ b/doc/admin-guide/plugins/money_trace.en.rst
@@ -18,37 +18,54 @@
Money Trace Plugin
-==================
+******************
-This is a remap plugin that allows ATS to participate in a distributed tracing system based upon
-the Comcast "Money" distributed tracing and monitoring library. The Comcast "Money" library has
-its roots in Google's Dapper and Twitters Zipkin systems. A money trace header or session id, is
-attached to transaction and allows an operator with the appropriate logging systems in place,
-to determine where errors and/or latency may exit.
+Description
+===========
-Use of the library enables the tracing of a transaction through all systems that participate in
-handling the request. See the documentation on this open source library at
-https://github.com/Comcast/money.
+This plugin allows ATS to participate in a distributed tracing system
+based upon the Comcast "Money" distributed tracing and monitoring library.
+The Comcast "Money" library has its roots in Google's Dapper and Twitters
+Zipkin systems. A money trace header or session id, is attached to
+transaction and allows an operator with the appropriate logging systems
+in place, to determine where errors and/or latency may exit.
+
+Use of the library enables the tracing of a transaction through all
+systems that participate in handling the request. See the documentation
+on this open source library at https://github.com/Comcast/money.
How it Works
-------------
-
-This plugin checks incoming requests for the "X-MoneyTrace" header. If the header is not present
-no further processing takes place. However if the header is present, the plugin will check to
-to see if the request has been cached. If so, the plugin will add the "X-Moneytrace" header from the
-incoming request to the cached response returned to the client as required by the money_trace
-protocol. If the request has not been cached, the plugin will extends the trace context by creating a new
-"X-MoneyTrace" header for inclusion in the outgoing request to a parent cache or origin server.
-The extended header includes the 'trace-id' from the incoming request, the incoming span-id
-becomes the outgoing parent-id and the plugin generates a new random long span id for the outgoing request.
-See the documentation at the link above for a complete description on the "X-MoneyTrace" header and how
-to use and extend it in a distributed tracing system.
+============
+
+This plugin checks incoming requests for the "X-MoneyTrace" header.
+If the header is not present no further processing takes place.
+However if the header is present, the plugin will check to to see if the
+request has been cached. If so, the plugin will add the "X-Moneytrace"
+header from the incoming request to the cached response returned to the
+client as required by the money_trace protocol. If the request has not
+been cached, the plugin will extends the trace context by creating a new
+"X-MoneyTrace" header for inclusion in the outgoing request to a parent
+cache or origin server. The extended header includes the 'trace-id'
+from the incoming request, the incoming span-id becomes the outgoing
+parent-id and the plugin generates a new span id for the
+outgoing request using the current state machine id.
+
+See the documentation at the link above for a complete description on
+the "X-MoneyTrace" header and how to use and extend it in a distributed
+tracing system.
+
+A sample money-trace header:
+
+::
+
+ X-MoneyTrace: trace-id=aa234a23-189e-4cc4-98ed-b5327b1ec231-3;parent-id=0;span-id=4303691729133364974
Installation
-------------
+============
-The `Money Trace` plugin is a :term:`remap plugin`. Enable it by adding
-``money_trace.so`` to your :file:`remap.config` file. There are no options.
+The `Money Trace` plugin can be either a :term:`remap plugin` or
+:term:`global plugin`. Enable it by adding ``money_trace.so`` to your
+:file:`remap.config` file or :file:`plugin.config`.
Here is an example remap.config entry:
@@ -57,3 +74,69 @@ Here is an example remap.config entry:
map http://vod.foobar.com http://origin.vod.foobar.com @plugin=money_trace.so
.. _MoneyTrace: https://github.com/Comcast/money
+
+Configuration
+=============
+
+The plugin supports the following options:
+
+* ``--create-if-none=[true|false]`` (default: ``false``)
+
+If no X-MoneyTrace header is found in the client request one will
+be manufactured using the transaction UUID as trace-id,
+the transaction state machine id as span-id and parent-id set to '0'.
+
+* ``--global-skip-header=[header name]`` (default: null/disable)
+
+This setting only applies to a :term:`global plugin` instance
+and allows remap plugin instances to override :term:`global plugin`
+behavior by disabling the :term:`global plugin`
+
+Because a :term:`global plugin` runs before any :term:`remap plugin`
+in the remap phase a pregen header may still be created by the
+:term:`global plugin` if configured to do so.
+
+The global skip check is performed during the post remap phase in order
+to allow remap plugins (like `header rewrite`) to set this skip header.
+
+It is strongly suggested to use a private ATS header (begins with '@')
+as this value.
+
+* ``--header=[header name]`` (default: ``X-MoneyTrace``)
+
+Allows the money trace header to be overridden.
+
+* ``--passthru=[true|false]`` (default: ``false``)
+
+In this mode ATS acts transparently and passes the client money trace
+header through to the parent. It also returns this same header back to
+the client. This option ignores the --create-if-none setting.
+
+* ``--pregen-header=[header name]`` (default: null/disable)
+
+Normally the money trace header for a transaction is only added to the
+transaction server request headers. If this argument is supplied the
+header will be generated earlier in the transaction and added to the
+client request headers. Use this for debug or for logging the current
+transaction's money trace header. It is suggested to use a private
+ATS header (begins with a '@') for this value. A :file:`logging.yaml`
+entry with pregen-header=@MoneyTrace might look like:
+
+::
+
+ %<{@MoneyTrace}cqh>
+
+Robustness
+==========
+
+This plugin tries to be robust in its parsing. At a minimum the value
+must start with `trace-id=` set to a none empty value.
+
+If `span-id=` is found in the header value that will be used as the
+parent-id for an upstream request. Otherwise '0' will be its value.
+
+If the incoming money trace header is invalid, it is handled based
+on the --create-if-none setting. If create-if-none is set a new
+money trace header will be generated and used. Otherwise the
+incoming client header value will be passed through.
+
diff --git a/doc/admin-guide/plugins/rate_limit.en.rst b/doc/admin-guide/plugins/rate_limit.en.rst
index ef3f3246f7a..5d51ac895c5 100644
--- a/doc/admin-guide/plugins/rate_limit.en.rst
+++ b/doc/admin-guide/plugins/rate_limit.en.rst
@@ -30,6 +30,17 @@ The limit counters and queues are per remap rule only, i.e. there is
(currently) no way to group transaction limits from different remap rules
into a single rate limiter.
+.. Note::
+ This is still work in progress, in particularly the configuration and
+ the IP reputation system needs some work. In particular:
+
+ * We need a proper YAML configuration overall, allowing us to configure
+ better per service controls as well as sharing resources between remap
+ rules or SNI.
+ * We need reloadable configurations.
+ * The IP reputation currently only works with the global plugin settings.
+ * There is no support for adding allow listed IPs to the IP reputation.
+
Remap Plugin
------------
@@ -96,7 +107,10 @@ Global Plugin
-------------
As a global plugin, the rate limiting currently applies only for TLS enabled
-connections, based on the SNI from the TLS handshake. The basic use is as::
+connections, based on the SNI from the TLS handshake. As a global plugin we
+also have the support of an IP reputation system, see below for configurations.
+
+The basic use is as::
rate_limit.so SNI=www1.example.com,www2.example.com --limit=2 --queue=2 --maxage=10000
@@ -144,6 +158,37 @@ The following options are available:
the plugin will use the FQDN of the SNI associated with each rate limiter instance
created during plugin initialization.
+.. option:: --iprep_buckets
+ The number of LRU buckets to use for the IP reputation. A good number here
+ is 10, but can be configured. The reason for the different buckets is to
+ account for a pseudo-sorted list of IPs on the frequency seen. Too few buckets
+ will not be enough to keep such a sorting, rendering the algorithm useless. To
+ function in our setup, the number of buckets must be less than ``100``.
+
+.. option:: --iprep_bucketsize
+ This is the size of the largest LRU bucket (the `entry bucket`), `15` is a good
+ value. This is a power of 2, so `15` means the largest LRU can hold `32768` entries.
+ Note that this option must be bigger then the `--iprep_buckets` setting, for the
+ bucket halfing to function.
+
+.. option:: --iprep_maxage
+ This is used for aging out entries out of the LRU, the default is `0` which means
+ no aging happens. Even with no aging, entries will eventually fall out of buckets
+ because of the LRU mechanism that kicks in. The aging is here to make sure a spike
+ in traffic from an IP doesn't keep the entry for too long in the LRUs.
+
+.. option:: --iprep_permablock_limit
+ The minimum number of hits an IP must reach to get moved to the permanent bucket.
+ In this bucket, entries will stay for 2x
+
+.. option:: --iprep_permablock_pressure
+ This option specifies from which bucket an IP is allowed to move from into the
+ perma block bucket. A good value here is likely `0` or `1`, which is very conservative.
+
+.. option:: --iprep_permablock_maxage
+ Similar to `--iprep_maxage` above, but only applies to the long term (`perma-block`)
+ bucket. Default is `0`, which means no aging to this bucket is applied.
+
Metrics
-------
Metric names are generated either using defaults or user-supplied values. In either
@@ -189,6 +234,21 @@ A user can specify their own prefixes and tags, but not types or metrics.
``resumed`` Queued connection is resumed.
============== ===================================================================
+IP Reputation
+-------------
+
+The goal of the IP reputation system is to simply try to identify IPs which are more
+likely to be abusive than others. It's not a perfect system, and it relies heavily on
+the notion of pressure. The Sieve LRUs are always filled, so you have to make sure that
+you only start using them when the system thinks it's under pressure.
+
+The Sieve LRU is a chained set of (configurable) LRUs, each with smaller and smaller
+capacity. This essentially adds a notion of partially sorted elements; All IPs in
+LRU generally are more active than the IPs in LRU . LRU is specially marked
+for longer term blocking, only the most abusive elements would end up here.
+
+.. figure:: /static/images/sdk/SieveLRU.png
+
Examples
--------
diff --git a/doc/admin-guide/plugins/slice.en.rst b/doc/admin-guide/plugins/slice.en.rst
index edb338906f2..8b56bbd362e 100644
--- a/doc/admin-guide/plugins/slice.en.rst
+++ b/doc/admin-guide/plugins/slice.en.rst
@@ -113,6 +113,26 @@ The slice plugin supports the following options::
Requires setting up an intermediate loopback remap rule.
-r for short
+ --skip-header= (default: X-Slicer-Info)
+ Header name used by the slice plugin after the loopback
+ to indicate that the slice plugin should be skipped.
+ -s for short
+
+ --crr-ims-header= (default: X-Crr-Ims)
+ Header name used by the slice plugin to tell the
+ `cache_range_requests` plugin that a request should
+ be marked as STALE. Used for self healing.
+ This must match the `--ims-header` option used by the
+ `cache_range_requests` plugin.
+ -i for short
+
+ --prefetch-count= (optional)
+ Default is 0
+ Prefetches successive 'n' slice block requests in the background
+ and cached. Especially for large objects, prefetching can improve
+ cache miss latency.
+ -f for short
+
Examples::
@plugin=slice.so @pparam=--blockbytes=1000000 @plugin=cache_range_requests.so
diff --git a/doc/admin-guide/plugins/stek_share.en.rst b/doc/admin-guide/plugins/stek_share.en.rst
new file mode 100644
index 00000000000..43ac28c6786
--- /dev/null
+++ b/doc/admin-guide/plugins/stek_share.en.rst
@@ -0,0 +1,105 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+
+.. include:: ../../common.defs
+
+.. _admin-plugins-stek_share:
+
+
+STEK Share Plugin
+*****************
+
+This plugin coordinates STEK (Session Ticket Encryption Key) between ATS instances running in a group.
+As the ID based session resumption rate continue to decrease, this new plugin will replace the
+:ref:`admin-plugins-ssl_session_reuse` plugin.
+
+
+How It Works
+============
+
+This plugin implements the `Raft consensus algorithm ` to decide on a leader. The leader will
+periodically create a new STEK key and share it with all other ATS boxes in the group. When the plugin starts up, it
+will automatically join the cluster of all other ATS boxes in the group, which will also automatically elect a leader.
+The plugin uses the `TSSslTicketKeyUpdate` call to update ATS with the latest two STEK's it has received.
+
+All communication are encrypted. All the ATS boxes participating in the STEK sharing must have access to the cert/key pair.
+
+Note that since the this plugin only updates STEK every few hours, all Raft related stuff are kept in memory, and some code is
+borrowed from the examples from `NuRaft library ` that is used in this plugin.
+
+
+Building
+========
+
+This plugin uses `NuRaft library ` for leader election and communication.
+The NuRaft library must be installed for this plugin to build. It can be specified by the `--with-nuraft` argument to configure.
+
+This plugin also uses `YAML-CPP library ` for reading the configuration file.
+The YAML-CPP library must be installed for this plugin to build. It can be specified by the `--with-yaml-cpp` argument to configure.
+
+As part of the experimental plugs, the `--enable-experimental-plugins` option must also be given to configure to build this plugin.
+
+
+Config File
+===========
+
+STEK Share is a global plugin. Its configuration file uses YAML, and is given as an argument to the plugin in :file:`plugin.config`.
+
+::
+ stek_share.so etc/trafficserver/example_server_conf.yaml
+
+Available options:
+
+* server_id - An unique ID for the server.
+* address - Hostname or IP address of the server.
+* port - Port number for communication.
+* asio_thread_pool_size - [Optional] Thread pool size for `ASIO library `. Default size is 4.
+* heart_beat_interval - [Optional] Heart beat interval of Raft leader, must be less than "election_timeout_lower_bound". Default value is 100 ms.
+* election_timeout_lower_bound - [Optional] Lower bound of Raft leader election timeout. Default value is 200 ms.
+* election_timeout_upper_bound - [Optional] Upper bound of Raft leader election timeout. Default value is 400 ms.
+* reserved_log_items - [Optional] The maximum number of logs preserved ahead the last snapshot. Default value is 5.
+* snapshot_distance - [Optional] The number of log appends for each snapshot. Default value is 5.
+* client_req_timeout - [Optional] Client request timeout. Default value is 3000 ms.
+* key_update_interval - The interval between STEK update.
+* server_list_file - Path to a file containing information of all the servers that's supposed to be in the Raft cluster.
+* root_cert_file - Path to the root ca file.
+* server_cert_file - Path to the cert file.
+* server_key_file - Path to the key file.
+* cert_verify_str - SSL verification string, for example "/C=US/ST=IL/O=Yahoo/OU=Edge/CN=localhost"
+
+
+Example Config File
+===================
+
+.. literalinclude:: ../../../plugins/experimental/stek_share/example_server_conf.yaml
+
+
+Server List File
+================
+
+Server list file as mentioned above, also in YAML.
+
+* server_id - ID of the server.
+* address - Hostname or IP address of the server.
+* port - Port number of the server.
+
+
+Example Server List File
+========================
+
+.. literalinclude:: ../../../plugins/experimental/stek_share/example_server_list.yaml
diff --git a/doc/developer-guide/api/types/TSSslSession.en.rst b/doc/developer-guide/api/types/TSSslSession.en.rst
index 4f50ead4e4e..4b8c6f4836e 100644
--- a/doc/developer-guide/api/types/TSSslSession.en.rst
+++ b/doc/developer-guide/api/types/TSSslSession.en.rst
@@ -28,6 +28,8 @@ Synopsis
#include
+.. c:macro:: TS_SSL_MAX_SSL_SESSION_ID_LENGTH
+
.. type:: TSSslSessionID
.. member:: size_t len
diff --git a/doc/developer-guide/core-architecture/HostDB-Data-Layout.svg b/doc/developer-guide/core-architecture/HostDB-Data-Layout.svg
new file mode 100644
index 00000000000..9c02674a826
--- /dev/null
+++ b/doc/developer-guide/core-architecture/HostDB-Data-Layout.svg
@@ -0,0 +1,3 @@
+
+
+
\ No newline at end of file
diff --git a/doc/developer-guide/core-architecture/hostdb.en.rst b/doc/developer-guide/core-architecture/hostdb.en.rst
new file mode 100644
index 00000000000..33eef3c253a
--- /dev/null
+++ b/doc/developer-guide/core-architecture/hostdb.en.rst
@@ -0,0 +1,191 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+.. include:: ../../common.defs
+
+.. highlight:: cpp
+.. default-domain:: cpp
+
+.. _developer-doc-hostdb:
+
+HostDB
+******
+
+HostDB is a cache of DNS results. It is used to increase performance by aggregating address
+resolution across transactions. HostDB also stores state information for specific IP addresses.
+
+Operation
+=========
+
+The primary operation for HostDB is to resolve a fully qualified domain name ("FQDN"). As noted each
+FQDN is associated with a single record. Each record has an array of items. When a resolution
+request is made the database is checked to see if the record is already present. If so, it is
+served. Otherwise a DNS request is made. When the nameserver replies a record is created, added
+to the database, and then returned to the requestor.
+
+Each info tracks several status values for its corresponding upstream. These are
+
+* HTTP version
+* Last failure time
+
+The HTTP version is tracked from responses and provides a mechanism to make intelligent guesses
+about the protocol to use to the upstream.
+
+The last failure time tracks when the last connection failure to the info occurred and doubles as
+a flag, where a value of ``TS_TIME_ZERO`` indicates a live target and any other value indicates a
+dead info.
+
+If an info is marked dead (has a non-zero last failure time) there is a "fail window" during which
+no connections are permitted. After this time the info is considered to be a "zombie". If all infos
+for a record are dead then a specific error message is generated (body factory tag
+"connect#all_dead"). Otherwise if the selected info is a zombie, a request is permitted but the
+zombie is immediately marked dead again, preventing any additional requests until either the fail
+window has passed or the single connection succeeds. A successful connection clears the last file
+time and the info becomes alive.
+
+Runtime Structure
+=================
+
+DNS results are stored in a global hash table as instances of ``HostDBRecord``. Each record stores
+the results of a single query. These records are not updated with new DNS results - instead a new
+record instance is created and replaces the previous instance in the table. The records are
+reference counted so such a replacement doesn't invalidate the old record if the latter is still
+being accessed. Some specific dynamic data is migrated from the old record to the new one, such as
+the failure status of the upstreams in the record.
+
+In each record is a variable length array of items, instances of ``HostDBInfo``, one for each
+IP address in the record. This is called the "round robin" data for historical reasons. For SRV
+records there is an additional storage area in the record that is used to store the SRV names.
+
+.. figure:: HostDB-Data-Layout.svg
+
+The round robin data is accessed by using an offset and count in the base record. For SRV records
+each record has an offset, relative to that ``HostDBInfo`` instance, for its own name in the name
+storage area.
+
+State information for the outbound connection has been moved to a refurbished ``DNSInfo`` class
+named ``ResolveInfo``. As much as possible relevant state information has been moved from the
+``HttpSM`` to this structure. This is intended for future work where the state machine deals only
+with upstream transactions and not sessions.
+
+``ResolveInfo`` may contain a reference to a HostDB record, which preserves the record even if it is
+replaced due to DNS queries in other transactions. The record is not required as the resolution
+information can be supplied directly without DNS or HostDB, e.g. a plugin sets the upstream address
+explicitly. The ``resolved_p`` flag indicates if the current information is valid and ready to be
+used or not. A result of this is there is no longer a specific holder for API provided addresses -
+the interface now puts the address in the ``ResolveInfo`` and marks it as resolved. This prevents
+further DNS / HostDB lookups and the address is used as is.
+
+The upstream port is a bit tricky and should be cleaned up. Currently value in ``srv_port``
+determines the port if set. If not, then the port in ``addr`` is used.
+
+Resolution Style
+----------------
+
+.. cpp:enum:: OS_Addr
+
+ Metadata about the source of the resolved address.'
+
+ .. cpp:enumerator:: TRY_DEFAULT
+
+ Use default resolution. This is the initial state.
+
+ .. cpp:enumerator:: TRY_HOSTDB
+
+ Use HostDB to resolve the target key.
+
+ .. cpp:enumerator:: TRY_CLIENT
+
+ Use the client supplied target address. This is used for transparent connections - the upstream
+ address is obtained from the inbound connection. May fail over to HostDB.
+
+ .. cpp:enumerator:: USE_HOSTDB
+
+ Use HostDB to resolve the target key.
+
+ .. cpp:enumerator:: USE_CLIENT
+
+ Use the client supplied target address.
+
+ .. cpp:enumerator:: USE_API
+
+ Use the address provided via the plugin API.
+
+ The parallel values for using HostDB and the client target address are to control fail over on
+ connection failure. The ``TRY_`` values can fail over to another style, but the ``USE_`` values
+ cannot. This prevents cycles of style changes by having any ``TRY_`` value fail over to a
+ ``USE_`` value, at which point it can no longer change. Note there is no ``TRY_API`` - if a
+ plugin sets the upstream address that is locked in.
+
+Issues
+======
+
+Currently if an upstream is marked down connections are still permitted, the only change is the
+number of retries. This has caused operational problems where dead systems are flooded with requests
+which, despite the timeouts, accumulate in ATS until ATS runs out of memory (there were instances of
+over 800K pending transactions). This also made it hard to bring the upstreams back online. With
+these changes requests to dead upstreams are strongly rate limited and other transactions are
+immediately terminated with a 502 response, protecting both the upstream and ATS.
+
+Future
+======
+
+There is still some work to be done in future PRs.
+
+* The fail window and the zombie window should be separate values. It is quite reasonable to want
+ to configure a very short fail window (possibly 0) with a moderately long zombie window so that
+ probing connections can immediately start going upstream at a low rate.
+
+* Failing an upstream should be more loosely connected to transactions. Currently there is a one
+ to one relationship where failure is defined as the failure of a specific transaction to connect.
+ There are situations where the number of connections attempts for mark a failure is should be
+ larger than the number of retries for a single transaction. For transiently busy upstreams and
+ low latency requests it can be reasonable to tune the per transaction timeout low with no retries
+ but this then risks marking down upstreams that were merely a bit slow at a given moment.
+
+* Parallel DNS requests should be supported. This is for both cross family requests and for split
+ DNS.
+
+* It would be nice to be able to do the probing connections to an upstream using synthetic requests
+ instead of burning actual user requests. What would be needed is a handoff from ATS to the probe
+ to indicate a particular upstream is considered down, at which point active health checks are done
+ until the upstream is once again alive, at which point this is handed off back to ATS.
+
+History
+=======
+
+This version has several major architectural changes from the previous version.
+
+* The data is split into records and info, not handled as a variant of a single data type. This
+ provides a noticeable simplification of the code.
+
+* Single and multiple address results are treated identically - a singleton is simply a multiple
+ of size 1. This yeilds a major simplification of the implementation.
+
+* Connections are throttled to dead upstreams, allowing only a single connection attempt per fail
+ window timing until a connection succeeds.
+
+* Timing information is stored in ``std::chrono`` data types instead of proprietary types.
+
+* State information has been promoted to atomics and updates are immediate rather than scheduled.
+ This also means the data in the state machine is a reference to a shared object, not a local copy.
+ The promotion was necessary to coordinate zombie connections to dead upstreams across transactions.
+
+* The "resolve key" is now a separate data object from the HTTP request. This is a subtle but
+ major change. The effect is requests can be routed to different upstreams without changing
+ the request. Parent selection can be greatly simplified as it become merely a matter of setting
+ the resolve key, rather than having a completely different code path.
diff --git a/doc/developer-guide/core-architecture/index.en.rst b/doc/developer-guide/core-architecture/index.en.rst
index e88e35fb74e..97f59712d72 100644
--- a/doc/developer-guide/core-architecture/index.en.rst
+++ b/doc/developer-guide/core-architecture/index.en.rst
@@ -26,5 +26,6 @@ Core Architecture
:maxdepth: 1
heap.en
+ hostdb.en
rpc.en
url_rewrite_architecture.en.rst
diff --git a/doc/static/images/sdk/SieveLRU.png b/doc/static/images/sdk/SieveLRU.png
new file mode 100644
index 00000000000..3e138e46d21
Binary files /dev/null and b/doc/static/images/sdk/SieveLRU.png differ
diff --git a/doc/uml/host-resolve.plantuml b/doc/uml/host-resolve.plantuml
new file mode 100644
index 00000000000..f3c6a6091e9
--- /dev/null
+++ b/doc/uml/host-resolve.plantuml
@@ -0,0 +1,24 @@
+' SPDX-License-Identifier: Apache-2.0
+' Licensed under the Apache License, Version 2.0 (the "License");
+' you may not use this file except in compliance with the License.
+' You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+' Unless required by applicable law or agreed to in writing, software distributed under the License is distributed
+' on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+' See the License for the specific language governing permissions and limitations under the License.
+
+@startuml
+
+hide empty description
+
+state HttpSM {
+ state do_http_server_open {
+ }
+}
+
+state HandleRequest #cyan
+state CallOSDNSLookup #cyan
+
+CallOSDNSLookup -> OSDNSLookup
+
+@enduml
+
diff --git a/example/plugins/c-api/protocol/TxnSM.c b/example/plugins/c-api/protocol/TxnSM.c
index 8f6ae5416a6..cb7f00f44ec 100644
--- a/example/plugins/c-api/protocol/TxnSM.c
+++ b/example/plugins/c-api/protocol/TxnSM.c
@@ -477,8 +477,6 @@ int
state_dns_lookup(TSCont contp, TSEvent event, TSHostLookupResult host_info)
{
TxnSM *txn_sm = (TxnSM *)TSContDataGet(contp);
- struct sockaddr const *q_server_addr;
- struct sockaddr_in ip_addr;
TSDebug(PLUGIN_NAME, "enter state_dns_lookup");
@@ -489,16 +487,16 @@ state_dns_lookup(TSCont contp, TSEvent event, TSHostLookupResult host_info)
txn_sm->q_pending_action = NULL;
/* Get the server IP from data structure TSHostLookupResult. */
- q_server_addr = TSHostLookupResultAddrGet(host_info);
+ struct sockaddr const *sa = TSHostLookupResultAddrGet(host_info);
/* Connect to the server using its IP. */
set_handler(txn_sm->q_current_handler, (TxnSMHandler)&state_connect_to_server);
TSAssert(txn_sm->q_pending_action == NULL);
- TSAssert(q_server_addr->sa_family == AF_INET); /* NO IPv6 in this plugin */
+ TSAssert(sa->sa_family == AF_INET); /* NO IPv6 in this plugin */
+ struct sockaddr_in *addr = (struct sockaddr_in *)(sa);
- memcpy(&ip_addr, q_server_addr, sizeof(ip_addr));
- ip_addr.sin_port = txn_sm->q_server_port;
- txn_sm->q_pending_action = TSNetConnect(contp, (struct sockaddr const *)&ip_addr);
+ addr->sin_port = txn_sm->q_server_port;
+ txn_sm->q_pending_action = TSNetConnect(contp, sa);
return TS_SUCCESS;
}
diff --git a/example/plugins/cpp-api/boom/boom.cc b/example/plugins/cpp-api/boom/boom.cc
index a312e7f2043..7a3a3234d66 100644
--- a/example/plugins/cpp-api/boom/boom.cc
+++ b/example/plugins/cpp-api/boom/boom.cc
@@ -98,13 +98,16 @@ GlobalPlugin *plugin;
// Functor that decides whether the HTTP error can be rewritten or not.
// Rewritable codes are: 2xx, 3xx, 4xx, 5xx and 6xx.
// 1xx is NOT rewritable!
-class IsRewritableCode : public std::unary_function
+class IsRewritableCode
{ // could probably be replaced with mem_ptr_fun()..
private:
int current_code_;
std::string current_code_string_;
public:
+ using argument_type = std::string;
+ using result_type = bool;
+
explicit IsRewritableCode(int current_code) : current_code_(current_code)
{
std::ostringstream oss;
diff --git a/include/ts/apidefs.h.in b/include/ts/apidefs.h.in
index e3ebe46528d..ac550a95fd6 100644
--- a/include/ts/apidefs.h.in
+++ b/include/ts/apidefs.h.in
@@ -437,8 +437,8 @@ typedef enum {
TS_LIFECYCLE_CLIENT_SSL_CTX_INITIALIZED_HOOK,
TS_LIFECYCLE_MSG_HOOK,
TS_LIFECYCLE_TASK_THREADS_READY_HOOK,
- TS_LIFECYCLE_SSL_SECRET_HOOK,
TS_LIFECYCLE_SHUTDOWN_HOOK,
+ TS_LIFECYCLE_SSL_SECRET_HOOK,
TS_LIFECYCLE_LAST_HOOK
} TSLifecycleHookID;
@@ -827,7 +827,6 @@ typedef enum {
TS_CONFIG_HTTP_CACHE_MAX_OPEN_WRITE_RETRIES,
TS_CONFIG_HTTP_REDIRECT_USE_ORIG_CACHE_KEY,
TS_CONFIG_HTTP_ATTACH_SERVER_SESSION_TO_CLIENT,
- TS_CONFIG_HTTP_MAX_PROXY_CYCLES,
TS_CONFIG_WEBSOCKET_NO_ACTIVITY_TIMEOUT,
TS_CONFIG_WEBSOCKET_ACTIVE_TIMEOUT,
TS_CONFIG_HTTP_UNCACHEABLE_REQUESTS_BYPASS_PARENT,
@@ -856,6 +855,11 @@ typedef enum {
TS_CONFIG_HTTP_SERVER_MIN_KEEP_ALIVE_CONNS,
TS_CONFIG_HTTP_PER_SERVER_CONNECTION_MAX,
TS_CONFIG_HTTP_PER_SERVER_CONNECTION_MATCH,
+#if TS_VERSION_MAJOR < 10
+ /* This is kept in the 9.x releases to preserve the ABI. Remove this in the
+ * 10 release. */
+ TS_CONFIG_SSL_CLIENT_VERIFY_SERVER,
+#endif
TS_CONFIG_SSL_CLIENT_VERIFY_SERVER_POLICY,
TS_CONFIG_SSL_CLIENT_VERIFY_SERVER_PROPERTIES,
TS_CONFIG_SSL_CLIENT_SNI_POLICY,
@@ -863,6 +867,7 @@ typedef enum {
TS_CONFIG_SSL_CLIENT_CA_CERT_FILENAME,
TS_CONFIG_HTTP_HOST_RESOLUTION_PREFERENCE,
TS_CONFIG_HTTP_CONNECT_DEAD_POLICY,
+ TS_CONFIG_HTTP_MAX_PROXY_CYCLES,
TS_CONFIG_PLUGIN_VC_DEFAULT_BUFFER_INDEX,
TS_CONFIG_PLUGIN_VC_DEFAULT_BUFFER_WATER_MARK,
TS_CONFIG_NET_SOCK_NOTSENT_LOWAT,
diff --git a/include/ts/experimental.h b/include/ts/experimental.h
index deef0ab3f6c..b2d020f4724 100644
--- a/include/ts/experimental.h
+++ b/include/ts/experimental.h
@@ -73,6 +73,29 @@ int64_t TSCacheHttpInfoSizeGet(TSCacheHttpInfo infop);
tsapi int TSMimeHdrFieldEqual(TSMBuffer bufp, TSMLoc hdr_obj, TSMLoc field1, TSMLoc field2);
tsapi TSReturnCode TSHttpTxnHookRegisteredFor(TSHttpTxn txnp, TSHttpHookID id, TSEventFunc funcp);
+#if TS_VERSION_MAJOR < 10
+
+/* These are kept in the 9.x releases to preserve the ABI. These should be
+ * removed in the ATS 10 release. */
+
+/* Various HTTP "control" modes */
+typedef enum {
+ TS_HTTP_CNTL_GET_LOGGING_MODE,
+ TS_HTTP_CNTL_SET_LOGGING_MODE,
+ TS_HTTP_CNTL_GET_INTERCEPT_RETRY_MODE,
+ TS_HTTP_CNTL_SET_INTERCEPT_RETRY_MODE
+} TSHttpCntlTypeExperimental;
+
+#define TS_HTTP_CNTL_OFF (void *)0
+#define TS_HTTP_CNTL_ON (void *)1
+/* usage:
+ void *onoff = 0;
+ TSHttpTxnCntl(.., TS_HTTP_CNTL_GET_LOGGING_MODE, &onoff);
+ if (onoff == TS_HTTP_CNTL_ON) ....
+*/
+tsapi TSReturnCode TSHttpTxnCntl(TSHttpTxn txnp, TSHttpCntlTypeExperimental cntl, void *data);
+
+#endif
/* Protocols APIs */
tsapi void TSVConnCacheHttpInfoSet(TSVConn connp, TSCacheHttpInfo infop);
diff --git a/include/ts/ts.h b/include/ts/ts.h
index dc1ac253ef7..90b2b7ef68e 100644
--- a/include/ts/ts.h
+++ b/include/ts/ts.h
@@ -1312,7 +1312,7 @@ tsapi TSReturnCode TSSslSecretUpdate(const char *secret_name, int secret_name_le
/* Create a new SSL context based on the settings in records.config */
tsapi TSSslContext TSSslServerContextCreate(TSSslX509 cert, const char *certname, const char *rsp_file);
tsapi void TSSslContextDestroy(TSSslContext ctx);
-tsapi void TSSslTicketKeyUpdate(char *ticketData, int ticketDataLen);
+tsapi TSReturnCode TSSslTicketKeyUpdate(char *ticketData, int ticketDataLen);
TSAcceptor TSAcceptorGet(TSVConn sslp);
TSAcceptor TSAcceptorGetbyID(int ID);
int TSAcceptorCount();
@@ -1950,7 +1950,13 @@ tsapi TSReturnCode TSPortDescriptorAccept(TSPortDescriptor, TSCont);
/* --------------------------------------------------------------------------
DNS Lookups */
tsapi TSAction TSHostLookup(TSCont contp, const char *hostname, size_t namelen);
+/** Retrieve an address from the host lookup.
+ *
+ * @param lookup_result Result handle passed to event callback.
+ * @return A @c sockaddr with the address if successful, a @c nullptr if not.
+ */
tsapi struct sockaddr const *TSHostLookupResultAddrGet(TSHostLookupResult lookup_result);
+
/* TODO: Eventually, we might want something like this as well, but it requires
support for building the HostDBInfo struct:
tsapi void TSHostLookupResultSet(TSHttpTxn txnp, TSHostLookupResult result);
diff --git a/include/tscore/BufferWriter.h b/include/tscore/BufferWriter.h
index 34e0b6bb541..85d0a74f83f 100644
--- a/include/tscore/BufferWriter.h
+++ b/include/tscore/BufferWriter.h
@@ -854,10 +854,10 @@ std::string &
bwprintv(std::string &s, ts::TextView fmt, std::tuple const &args)
{
auto len = s.size(); // remember initial size
- size_t n = ts::FixedBufferWriter(const_cast(s.data()), s.size()).printv(fmt, std::move(args)).extent();
+ size_t n = ts::FixedBufferWriter(const_cast(s.data()), s.size()).printv(fmt, args).extent();
s.resize(n); // always need to resize - if shorter, must clip pre-existing text.
if (n > len) { // dropped data, try again.
- ts::FixedBufferWriter(const_cast(s.data()), s.size()).printv(fmt, std::move(args));
+ ts::FixedBufferWriter(const_cast(s.data()), s.size()).printv(fmt, args);
}
return s;
}
diff --git a/include/tscore/BufferWriterForward.h b/include/tscore/BufferWriterForward.h
index 8da67c60b4c..7773486b92f 100644
--- a/include/tscore/BufferWriterForward.h
+++ b/include/tscore/BufferWriterForward.h
@@ -148,4 +148,11 @@ class BWFormat;
class BufferWriter;
+/// Storage for debug messages.
+/// If @c bwprint is used with this, the storage is reused which minimizes allocations.
+/// E.g.
+/// @code
+
+inline thread_local std::string bw_dbg;
+
} // namespace ts
diff --git a/include/tscore/CryptoHash.h b/include/tscore/CryptoHash.h
index 767cb7b64fe..4c2d97e3ff6 100644
--- a/include/tscore/CryptoHash.h
+++ b/include/tscore/CryptoHash.h
@@ -23,6 +23,7 @@
#pragma once
#include "tscore/BufferWriter.h"
+#include
#include
/// Apache Traffic Server commons.
@@ -132,6 +133,9 @@ class CryptoContextBase
/// @note This is just as fast as the previous style, as a new context must be initialized
/// every time this is done.
bool hash_immediate(CryptoHash &hash, void const *data, int length);
+
+protected:
+ EVP_MD_CTX *_ctx = nullptr;
};
inline bool
@@ -159,29 +163,31 @@ class CryptoContext : public CryptoContextBase
UNSPECIFIED,
#if TS_ENABLE_FIPS == 0
MD5,
- MMH,
#endif
SHA256,
}; ///< What type of hash we really are.
static HashType Setting;
- /// Size of storage for placement @c new of hashing context.
- static size_t const OBJ_SIZE = 256;
+ ~CryptoContext()
+ {
+ delete _base;
+ _base = nullptr;
+ }
-protected:
- char _obj[OBJ_SIZE]; ///< Raw storage for instantiated context.
+private:
+ CryptoContextBase *_base = nullptr;
};
inline bool
CryptoContext::update(void const *data, int length)
{
- return reinterpret_cast(_obj)->update(data, length);
+ return _base->update(data, length);
}
inline bool
CryptoContext::finalize(CryptoHash &hash)
{
- return reinterpret_cast(_obj)->finalize(hash);
+ return _base->finalize(hash);
}
ts::BufferWriter &bwformat(ts::BufferWriter &w, ts::BWFSpec const &spec, ats::CryptoHash const &hash);
diff --git a/include/tscore/Diags.h b/include/tscore/Diags.h
index 53cfd759f37..50a708efdfa 100644
--- a/include/tscore/Diags.h
+++ b/include/tscore/Diags.h
@@ -159,6 +159,15 @@ extern Diags *diags;
} \
} while (0)
+#define Debug_bw(tag, fmt, ...) \
+ do { \
+ if (unlikely(diags->on())) { \
+ static const SourceLocation loc = MakeSourceLocation(); \
+ static LogMessage log_message; \
+ log_message.debug(tag, loc, "%s", ts::bwprint(ts::bw_dbg, fmt, __VA_ARGS__).c_str()); \
+ } \
+ } while (0)
+
/** Same as Debug above, but this allows a positive override of the tag
* mechanism by a flag boolean.
*
diff --git a/include/tscore/IntrusivePtr.h b/include/tscore/IntrusivePtr.h
index bfe63a3aee0..70cc671e394 100644
--- a/include/tscore/IntrusivePtr.h
+++ b/include/tscore/IntrusivePtr.h
@@ -330,9 +330,13 @@ template class IntrusivePtrDefaultPolicy
static void finalize(T *t);
/// Strict weak order for STL containers.
- class Order : public std::binary_function, IntrusivePtr, bool>
+ class Order
{
public:
+ using first_argument_type = IntrusivePtr;
+ using second_argument_type = IntrusivePtr;
+ using result_type = bool;
+
/// Default constructor.
Order() {}
/// Compare by raw pointer.
diff --git a/include/tscore/MD5.h b/include/tscore/MD5.h
index 3131197dfef..8b1e5bb26f7 100644
--- a/include/tscore/MD5.h
+++ b/include/tscore/MD5.h
@@ -29,9 +29,6 @@
class MD5Context : public ats::CryptoContextBase
{
-protected:
- EVP_MD_CTX *_ctx;
-
public:
MD5Context()
{
diff --git a/include/tscore/PluginUserArgs.h b/include/tscore/PluginUserArgs.h
index 4dd637b2158..b466829c1fb 100644
--- a/include/tscore/PluginUserArgs.h
+++ b/include/tscore/PluginUserArgs.h
@@ -86,7 +86,7 @@ template class PluginUserArgs : public virtual PluginUserArgsM
{
public:
void *
- get_user_arg(size_t ix) const
+ get_user_arg(size_t ix) const override
{
ink_release_assert(SanityCheckUserIndex(I, ix));
ix -= get_user_arg_offset(I);
@@ -95,7 +95,7 @@ template class PluginUserArgs : public virtual PluginUserArgsM
};
void
- set_user_arg(size_t ix, void *arg)
+ set_user_arg(size_t ix, void *arg) override
{
ink_release_assert(SanityCheckUserIndex(I, ix));
ix -= get_user_arg_offset(I);
diff --git a/include/tscore/Regression.h b/include/tscore/Regression.h
index 72e65fa2c4b..6c58973817a 100644
--- a/include/tscore/Regression.h
+++ b/include/tscore/Regression.h
@@ -101,3 +101,5 @@ struct RegressionTest {
int rprintf(RegressionTest *t, const char *format, ...);
int rperf(RegressionTest *t, const char *tag, double val);
const char *regression_status_string(int status);
+
+void tell_diags_regression_testing_is_on();
diff --git a/include/tscore/SHA256.h b/include/tscore/SHA256.h
index 9b3140a028c..446ae0cb896 100644
--- a/include/tscore/SHA256.h
+++ b/include/tscore/SHA256.h
@@ -29,26 +29,23 @@
class SHA256Context : public ats::CryptoContextBase
{
-protected:
- EVP_MD_CTX *ctx;
-
public:
SHA256Context()
{
- ctx = EVP_MD_CTX_new();
- EVP_DigestInit_ex(ctx, EVP_sha256(), nullptr);
+ _ctx = EVP_MD_CTX_new();
+ EVP_DigestInit_ex(_ctx, EVP_sha256(), nullptr);
}
- ~SHA256Context() { EVP_MD_CTX_free(ctx); }
+ ~SHA256Context() { EVP_MD_CTX_free(_ctx); }
/// Update the hash with @a data of @a length bytes.
bool
update(void const *data, int length) override
{
- return EVP_DigestUpdate(ctx, data, length);
+ return EVP_DigestUpdate(_ctx, data, length);
}
/// Finalize and extract the @a hash.
bool
finalize(CryptoHash &hash) override
{
- return EVP_DigestFinal_ex(ctx, hash.u8, nullptr);
+ return EVP_DigestFinal_ex(_ctx, hash.u8, nullptr);
}
};
diff --git a/include/tscore/bwf_std_format.h b/include/tscore/bwf_std_format.h
index e67c858fc0b..cb060edd170 100644
--- a/include/tscore/bwf_std_format.h
+++ b/include/tscore/bwf_std_format.h
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include "tscpp/util/TextView.h"
#include "tscore/BufferWriterForward.h"
@@ -38,6 +39,20 @@ bwformat(ts::BufferWriter &w, ts::BWFSpec const &spec, atomic const &v)
return ts::bwformat(w, spec, v.load());
}
+template
+ts::BufferWriter &
+bwformat(ts::BufferWriter &w, ts::BWFSpec const &spec, chrono::duration const &d)
+{
+ return bwformat(w, spec, d.count());
+}
+
+template
+ts::BufferWriter &
+bwformat(ts::BufferWriter &w, ts::BWFSpec const &spec, chrono::time_point const &t)
+{
+ return bwformat(w, spec, t.time_since_epoch());
+}
+
} // end namespace std
namespace ts
@@ -130,5 +145,4 @@ namespace bwf
BufferWriter &bwformat(BufferWriter &w, BWFSpec const &spec, bwf::Errno const &e);
BufferWriter &bwformat(BufferWriter &w, BWFSpec const &spec, bwf::Date const &date);
BufferWriter &bwformat(BufferWriter &w, BWFSpec const &spec, bwf::OptionalAffix const &opts);
-
} // namespace ts
diff --git a/include/tscore/ink_base64.h b/include/tscore/ink_base64.h
index a3c4d510afc..1473a312a51 100644
--- a/include/tscore/ink_base64.h
+++ b/include/tscore/ink_base64.h
@@ -42,5 +42,16 @@ bool ats_base64_encode(const unsigned char *inBuffer, size_t inBufferSize, char
bool ats_base64_decode(const char *inBuffer, size_t inBufferSize, unsigned char *outBuffer, size_t outBufSize, size_t *length);
// Little helper functions to calculate minimum required output buffer for encoding/decoding.
-#define ATS_BASE64_ENCODE_DSTLEN(_length) ((_length * 8) / 6 + 4)
-#define ATS_BASE64_DECODE_DSTLEN(_length) (((_length + 3) / 4) * 3)
+// These sizes include one byte for null termination, because ats_base64_encode and ats_base64_decode will always write a null
+// terminator.
+inline constexpr size_t
+ats_base64_encode_dstlen(size_t length)
+{
+ return ((length + 2) / 3) * 4 + 1;
+}
+
+inline constexpr size_t
+ats_base64_decode_dstlen(size_t length)
+{
+ return ((length + 3) / 4) * 3 + 1;
+}
diff --git a/include/tscore/ink_defs.h b/include/tscore/ink_defs.h
index 35141368371..0202649f2ac 100644
--- a/include/tscore/ink_defs.h
+++ b/include/tscore/ink_defs.h
@@ -24,19 +24,15 @@
#pragma once
#include "tscore/ink_config.h"
+
#include // NOLINT(modernize-deprecated-headers)
-#include
#ifdef HAVE_STDINT_H
#include // NOLINT(modernize-deprecated-headers)
-#else
-// TODO: Add "standard" int types?
#endif
#ifdef HAVE_INTTYPES_H
#include // NOLINT(modernize-deprecated-headers)
-#else
-// TODO: add PRI*64 stuff?
#endif
#ifndef INT64_MIN
@@ -61,11 +57,6 @@
#endif
#endif
-#if defined(darwin)
-#define RENTRENT_GETHOSTBYNAME
-#define RENTRENT_GETHOSTBYADDR
-#endif
-
#define NUL '\0'
// Determine the element count for an array.
@@ -100,26 +91,13 @@ countof(const T (&)[N])
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
-#if TS_USE_HWLOC
-#include
-#endif
-
-#if defined(MAP_NORESERVE)
-#define MAP_SHARED_MAP_NORESERVE (MAP_SHARED | MAP_NORESERVE)
-#else
-#define MAP_SHARED_MAP_NORESERVE (MAP_SHARED)
-#endif
-
/* Variables
*/
-extern int debug_level;
extern int off;
extern int on;
/* Functions
*/
-int ink_sys_name_release(char *name, int namelen, char *release, int releaselen);
-int ink_number_of_processors();
int ink_login_name_max();
#ifdef __cplusplus
@@ -136,8 +114,3 @@ ROUNDUP(ArithmeticV value, ArithmeticM m)
return value;
}
#endif
-
-#if TS_USE_HWLOC
-// Get the hardware topology
-hwloc_topology_t ink_get_topology();
-#endif
diff --git a/include/tscore/ink_hw.h b/include/tscore/ink_hw.h
new file mode 100644
index 00000000000..fc9d7a7a68a
--- /dev/null
+++ b/include/tscore/ink_hw.h
@@ -0,0 +1,33 @@
+/** @file
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#pragma once
+
+#include "tscore/ink_config.h"
+
+#if TS_USE_HWLOC
+#include
+
+// Get the hardware topology
+hwloc_topology_t ink_get_topology();
+#endif
+
+int ink_number_of_processors();
diff --git a/include/tscore/ink_queue.h b/include/tscore/ink_queue.h
index ffddafefa16..4b8f02873e4 100644
--- a/include/tscore/ink_queue.h
+++ b/include/tscore/ink_queue.h
@@ -139,10 +139,10 @@ union head_p {
#define SET_FREELIST_POINTER_VERSION(_x, _p, _v) \
(_x).s.pointer = _p; \
(_x).s.version = _v
-#elif defined(__x86_64__) || defined(__ia64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(__mips64)
+#elif defined(__x86_64__) || defined(__ia64__) || defined(__powerpc64__) || defined(__mips64)
/* Layout of FREELIST_POINTER
*
- * 0 ~ 47 bits : 48 bits, Virtual Address (47 bits for AMD64 and 48 bits for AArch64)
+ * 0 ~ 47 bits : 48 bits, Virtual Address
* 48 ~ 62 bits : 15 bits, Freelist Version
* 63 bits : 1 bits, The type of Virtual Address (0 = user space, 1 = kernel space)
*/
@@ -158,11 +158,30 @@ union head_p {
#else
/* the shift is `logical' */
#define FREELIST_POINTER(_x) \
- ((void *)((((intptr_t)(_x).data) & 0x0000FFFFFFFFFFFFLL) | (((~((((intptr_t)(_x).data) >> 63) - 1)) >> 48) << 48)))
+ ((void *)((((intptr_t)(_x).data) & 0x0000FFFFFFFFFFFFLL) | ((~((((intptr_t)(_x).data) >> 63) - 1)) << 48)))
#endif
#define FREELIST_VERSION(_x) ((((intptr_t)(_x).data) & 0x7FFF000000000000LL) >> 48)
#define SET_FREELIST_POINTER_VERSION(_x, _p, _v) (_x).data = ((((intptr_t)(_p)) & 0x8000FFFFFFFFFFFFLL) | (((_v)&0x7FFFLL) << 48))
+#elif defined(__aarch64__)
+/* Layout of FREELIST_POINTER
+ *
+ * 0 ~ 51 bits : 52 bits, Virtual Address
+ * 52 ~ 62 bits : 11 bits, Freelist Version
+ * 63 bits : 1 bits, The type of Virtual Address (0 = user space, 1 = kernel space)
+ */
+#if ((~0 >> 1) < 0)
+/* the shift is `arithmetic' */
+#define FREELIST_POINTER(_x) \
+ ((void *)((((intptr_t)(_x).data) & 0x000FFFFFFFFFFFFFLL) | ((((intptr_t)(_x).data) >> 63) << 52))) // sign extend
+#else
+/* the shift is `logical' */
+#define FREELIST_POINTER(_x) \
+ ((void *)((((intptr_t)(_x).data) & 0x000FFFFFFFFFFFFFLL) | ((~((((intptr_t)(_x).data) >> 63) - 1)) << 52)))
+#endif
+
+#define FREELIST_VERSION(_x) ((((intptr_t)(_x).data) & 0x7FF0000000000000LL) >> 52)
+#define SET_FREELIST_POINTER_VERSION(_x, _p, _v) (_x).data = ((((intptr_t)(_p)) & 0x800FFFFFFFFFFFFFLL) | (((_v)&0x7FFLL) << 52))
#else
#error "unsupported processor"
#endif
diff --git a/include/tscore/ink_stack_trace.h b/include/tscore/ink_stack_trace.h
index 5d10fd03894..ff759b8b802 100644
--- a/include/tscore/ink_stack_trace.h
+++ b/include/tscore/ink_stack_trace.h
@@ -29,8 +29,15 @@
#ifdef __cplusplus
extern "C" {
#endif
+
/* dumps the current back trace to stderr */
void ink_stack_trace_dump();
+
+/**
+ Get symbol of @n-th frame
+*/
+const void *ink_backtrace(const int n);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/tscore/ts_file.h b/include/tscore/ts_file.h
index c4389e948f6..8a9eff2befc 100644
--- a/include/tscore/ts_file.h
+++ b/include/tscore/ts_file.h
@@ -329,5 +329,12 @@ namespace file
/* ------------------------------------------------------------------- */
} // namespace file
+
+inline BufferWriter &
+bwformat(BufferWriter &w, BWFSpec const &spec, file::path const &path)
+{
+ return bwformat(w, spec, path.string());
+}
+
} // namespace ts
/* ------------------------------------------------------------------- */
diff --git a/include/tscpp/api/Headers.h b/include/tscpp/api/Headers.h
index 9e6daea8ef4..ffaffde81b5 100644
--- a/include/tscpp/api/Headers.h
+++ b/include/tscpp/api/Headers.h
@@ -109,12 +109,15 @@ class HeaderField;
/**
* @brief A header field value iterator iterates through all header fields.
*/
-class header_field_value_iterator : public std::iterator
+class header_field_value_iterator
{
private:
HeaderFieldValueIteratorState *state_;
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = int;
+
/**
* Constructor for header_field_value_iterator, this shouldn't need to be used directly.
* @param bufp the TSMBuffer associated with the headers
@@ -169,13 +172,16 @@ class header_field_value_iterator : public std::iterator
+class header_field_iterator
{
private:
HeaderFieldIteratorState *state_;
header_field_iterator(void *hdr_buf, void *hdr_loc, void *field_loc);
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = int;
+
~header_field_iterator();
/**
diff --git a/include/tscpp/util/Makefile.am b/include/tscpp/util/Makefile.am
index 6d073e61124..a1d18fd1900 100644
--- a/include/tscpp/util/Makefile.am
+++ b/include/tscpp/util/Makefile.am
@@ -22,5 +22,7 @@ library_include_HEADERS = \
IntrusiveDList.h \
LocalBuffer.h \
PostScript.h \
+ Strerror.h \
+ string_view_util.h \
TextView.h \
TsSharedMutex.h
diff --git a/include/tscpp/util/Strerror.h b/include/tscpp/util/Strerror.h
new file mode 100644
index 00000000000..eb1e6731ca5
--- /dev/null
+++ b/include/tscpp/util/Strerror.h
@@ -0,0 +1,82 @@
+/** @file
+
+ A convenience wrapper for the thread-safe strerror_r() function, either
+ GNU or XSI version. Allows the avoidance of use of the thread-unsafe
+ strerror() function.
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#pragma once
+
+#include
+
+namespace ts
+{
+// Typically this class is used to create anonymous temporaries, for example:
+//
+// if ((fd = open(file_path, O_RDONLY)) < 0) {
+// Error("%s Can not open %s file : %s", module_name, file_path, Strerror(errno).c_str());
+// return nullptr;
+// }
+//
+class Strerror
+{
+public:
+ Strerror(int err_num)
+ {
+ // Handle either GNU or XSI version of strerror_r().
+ //
+ if (!_success(strerror_r(err_num, _buf, 256))) {
+ _c_str = "strerror_r() call failed";
+ } else {
+ _buf[255] = '\0';
+ _c_str = _buf;
+ }
+
+ // Make sure there are no unused function warnings.
+ //
+ static_cast(_success(0));
+ static_cast(_success(nullptr));
+ }
+
+ char const *
+ c_str() const
+ {
+ return (_c_str);
+ }
+
+private:
+ char _buf[256];
+ char const *_c_str;
+
+ bool
+ _success(int retval)
+ {
+ return retval == 0;
+ }
+
+ bool
+ _success(char *retval)
+ {
+ return retval != nullptr;
+ }
+};
+
+} // end namespace ts
diff --git a/include/tscpp/util/TsSharedMutex.h b/include/tscpp/util/TsSharedMutex.h
index 3ac7f2ea948..df0fd2c23ee 100644
--- a/include/tscpp/util/TsSharedMutex.h
+++ b/include/tscpp/util/TsSharedMutex.h
@@ -25,7 +25,7 @@
#pragma once
#include
-#include
+#include
#if __has_include()
#include
@@ -54,49 +54,6 @@
namespace ts
{
-class Strerror
-{
-public:
- Strerror(int err_num)
- {
- // Handle either GNU or XSI version of strerror_r().
- //
- if (!_success(strerror_r(err_num, _buf, 256))) {
- _c_str = "strerror_r() call failed";
- } else {
- _buf[255] = '\0';
- _c_str = _buf;
- }
-
- // Make sure there are no unused function warnings.
- //
- static_cast(_success(0));
- static_cast(_success(nullptr));
- }
-
- char const *
- c_str() const
- {
- return (_c_str);
- }
-
-private:
- char _buf[256];
- char const *_c_str;
-
- bool
- _success(int retval)
- {
- return retval == 0;
- }
-
- bool
- _success(char *retval)
- {
- return retval != nullptr;
- }
-};
-
// A class with the same interface as std::shared_mutex, but which is not prone to writer starvation.
//
class shared_mutex
diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc
index df8e27bea8f..9384ed7c6b4 100644
--- a/iocore/aio/AIO.cc
+++ b/iocore/aio/AIO.cc
@@ -25,7 +25,8 @@
* Async Disk IO operations.
*/
-#include
+#include "tscore/TSSystemState.h"
+#include "tscore/ink_hw.h"
#include "P_AIO.h"
diff --git a/iocore/aio/test_AIO.cc b/iocore/aio/test_AIO.cc
index e3dccef256d..bef0a8ea3b7 100644
--- a/iocore/aio/test_AIO.cc
+++ b/iocore/aio/test_AIO.cc
@@ -23,6 +23,7 @@
#include "P_AIO.h"
#include "InkAPIInternal.h"
+#include "tscore/ink_hw.h"
#include "tscore/I_Layout.h"
#include "tscore/TSSystemState.h"
#include "tscore/Random.h"
diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
index edb825a4a82..e22e5674587 100644
--- a/iocore/cache/Cache.cc
+++ b/iocore/cache/Cache.cc
@@ -1434,7 +1434,7 @@ Vol::handle_recover_from_data(int event, void * /* data ATS_UNUSED */)
{
uint32_t got_len = 0;
uint32_t max_sync_serial = header->sync_serial;
- char *s, *e;
+ char *s, *e = nullptr;
if (event == EVENT_IMMEDIATE) {
if (header->sync_serial == 0) {
io.aiocb.aio_buf = nullptr;
diff --git a/iocore/cache/CacheHosting.cc b/iocore/cache/CacheHosting.cc
index 41c995742ff..60cb6890929 100644
--- a/iocore/cache/CacheHosting.cc
+++ b/iocore/cache/CacheHosting.cc
@@ -112,9 +112,7 @@ CacheHostMatcher::Match(const char *rdata, int rlen, CacheHostResult *result) co
return;
}
- char *data = static_cast(ats_malloc(rlen + 1));
- memcpy(data, rdata, rlen);
- *(data + rlen) = '\0';
+ std::string_view data{rdata, static_cast(rlen)};
HostLookupState s;
r = host_lookup->MatchFirst(data, &s, &opaque_ptr);
@@ -122,11 +120,10 @@ CacheHostMatcher::Match(const char *rdata, int rlen, CacheHostResult *result) co
while (r == true) {
ink_assert(opaque_ptr != nullptr);
data_ptr = static_cast(opaque_ptr);
- data_ptr->UpdateMatch(result, data);
+ data_ptr->UpdateMatch(result);
r = host_lookup->MatchNext(&s, &opaque_ptr);
}
- ats_free(data);
}
//
@@ -490,18 +487,14 @@ CacheHostRecord::Init(matcher_line *line_info, CacheType typ)
const char *errptr = "A volume number expected";
RecSignalWarning(REC_SIGNAL_CONFIG_ERROR, "%s discarding %s entry at line %d :%s", "[CacheHosting]", config_file,
line_info->line_num, errptr);
- if (val != nullptr) {
- ats_free(val);
- }
+ ats_free(val);
return -1;
}
}
if ((*s < '0') || (*s > '9')) {
RecSignalWarning(REC_SIGNAL_CONFIG_ERROR, "%s discarding %s entry at line %d : bad token [%c]", "[CacheHosting]",
config_file, line_info->line_num, *s);
- if (val != nullptr) {
- ats_free(val);
- }
+ ats_free(val);
return -1;
}
s++;
@@ -537,9 +530,7 @@ CacheHostRecord::Init(matcher_line *line_info, CacheType typ)
if (!is_vol_present) {
RecSignalWarning(REC_SIGNAL_CONFIG_ERROR, "%s discarding %s entry at line %d : bad volume number [%d]",
"[CacheHosting]", config_file, line_info->line_num, volume_number);
- if (val != nullptr) {
- ats_free(val);
- }
+ ats_free(val);
return -1;
}
if (c == '\0') {
@@ -583,7 +574,7 @@ CacheHostRecord::Init(matcher_line *line_info, CacheType typ)
}
void
-CacheHostRecord::UpdateMatch(CacheHostResult *r, char * /* rd ATS_UNUSED */)
+CacheHostRecord::UpdateMatch(CacheHostResult *r)
{
r->record = this;
}
diff --git a/iocore/cache/CacheTest.cc b/iocore/cache/CacheTest.cc
index 97f1301b541..f1d50fd7e48 100644
--- a/iocore/cache/CacheTest.cc
+++ b/iocore/cache/CacheTest.cc
@@ -334,7 +334,7 @@ EXCLUSIVE_REGRESSION_TEST(cache)(RegressionTest *t, int /* atype ATS_UNUSED */,
CACHE_SM(
t, replace_write_test,
- { cacheProcessor.open_write(this, &key, CACHE_FRAG_TYPE_NONE, 100, CACHE_WRITE_OPT_SYNC); } int open_write_callout() {
+ { cacheProcessor.open_write(this, &key, CACHE_FRAG_TYPE_NONE, 100, CACHE_WRITE_OPT_SYNC); } int open_write_callout() override {
header.serial = 10;
cache_vc->set_header(&header, sizeof(header));
cvio = cache_vc->do_io_write(this, nbytes, buffer_reader);
@@ -347,18 +347,19 @@ EXCLUSIVE_REGRESSION_TEST(cache)(RegressionTest *t, int /* atype ATS_UNUSED */,
CACHE_SM(
t, replace_test,
- { cacheProcessor.open_write(this, &key, CACHE_FRAG_TYPE_NONE, 100, CACHE_WRITE_OPT_OVERWRITE_SYNC); } int open_write_callout() {
- CacheTestHeader *h = nullptr;
- int hlen = 0;
- if (cache_vc->get_header((void **)&h, &hlen) < 0)
- return -1;
- if (h->serial != 10)
- return -1;
- header.serial = 11;
- cache_vc->set_header(&header, sizeof(header));
- cvio = cache_vc->do_io_write(this, nbytes, buffer_reader);
- return 1;
- });
+ { cacheProcessor.open_write(this, &key, CACHE_FRAG_TYPE_NONE, 100, CACHE_WRITE_OPT_OVERWRITE_SYNC); } int open_write_callout()
+ override {
+ CacheTestHeader *h = nullptr;
+ int hlen = 0;
+ if (cache_vc->get_header((void **)&h, &hlen) < 0)
+ return -1;
+ if (h->serial != 10)
+ return -1;
+ header.serial = 11;
+ cache_vc->set_header(&header, sizeof(header));
+ cvio = cache_vc->do_io_write(this, nbytes, buffer_reader);
+ return 1;
+ });
replace_test.expect_initial_event = CACHE_EVENT_OPEN_WRITE;
replace_test.expect_event = VC_EVENT_WRITE_COMPLETE;
replace_test.nbytes = 100;
@@ -366,7 +367,7 @@ EXCLUSIVE_REGRESSION_TEST(cache)(RegressionTest *t, int /* atype ATS_UNUSED */,
replace_test.content_salt = 1;
CACHE_SM(
- t, replace_read_test, { cacheProcessor.open_read(this, &key); } int open_read_callout() {
+ t, replace_read_test, { cacheProcessor.open_read(this, &key); } int open_read_callout() override {
CacheTestHeader *h = nullptr;
int hlen = 0;
if (cache_vc->get_header((void **)&h, &hlen) < 0)
@@ -389,7 +390,7 @@ EXCLUSIVE_REGRESSION_TEST(cache)(RegressionTest *t, int /* atype ATS_UNUSED */,
rand_CacheKey(&large_write_test.key, thread->mutex);
CACHE_SM(
- t, pread_test, { cacheProcessor.open_read(this, &key); } int open_read_callout() {
+ t, pread_test, { cacheProcessor.open_read(this, &key); } int open_read_callout() override {
cvio = cache_vc->do_io_pread(this, nbytes, buffer, 7000000);
return 1;
});
diff --git a/iocore/cache/CacheWrite.cc b/iocore/cache/CacheWrite.cc
index 605588d790f..20baeb393f8 100644
--- a/iocore/cache/CacheWrite.cc
+++ b/iocore/cache/CacheWrite.cc
@@ -288,6 +288,13 @@ iobufferblock_memcpy(char *p, int len, IOBufferBlock *ab, int offset)
EvacuationBlock *
Vol::force_evacuate_head(Dir *evac_dir, int pinned)
{
+ auto bucket = dir_evac_bucket(evac_dir);
+ if (!evac_bucket_valid(bucket)) {
+ DDebug("cache_evac", "dir_evac_bucket out of bounds, skipping evacuate: %ld(%d), %d, %d", bucket, evacuate_size,
+ (int)dir_offset(evac_dir), (int)dir_phase(evac_dir));
+ return nullptr;
+ }
+
// build an evacuation block for the object
EvacuationBlock *b = evacuation_block_exists(evac_dir, this);
// if we have already started evacuating this document, its too late
@@ -300,7 +307,7 @@ Vol::force_evacuate_head(Dir *evac_dir, int pinned)
b = new_EvacuationBlock(mutex->thread_holding);
b->dir = *evac_dir;
DDebug("cache_evac", "force: %d, %d", (int)dir_offset(evac_dir), (int)dir_phase(evac_dir));
- evacuate[dir_evac_bucket(evac_dir)].push(b);
+ evacuate[bucket].push(b);
}
b->f.pinned = pinned;
b->f.evacuate_head = 1;
@@ -500,7 +507,7 @@ CacheVC::evacuateDocDone(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
(int)dir_phase(&overwrite_dir), (int)dir_offset(&dir), (int)dir_phase(&dir));
int i = dir_evac_bucket(&overwrite_dir);
// nasty beeping race condition, need to have the EvacuationBlock here
- EvacuationBlock *b = vol->evacuate[i].head;
+ EvacuationBlock *b = vol->evac_bucket_valid(i) ? vol->evacuate[i].head : nullptr;
for (; b; b = b->link.next) {
if (dir_offset(&b->dir) == dir_offset(&overwrite_dir)) {
// If the document is single fragment (although not tied to the vector),
@@ -651,6 +658,7 @@ Vol::evacuateDocReadDone(int event, Event *e)
Doc *doc = reinterpret_cast(doc_evacuator->buf->data());
CacheKey next_key;
EvacuationBlock *b = nullptr;
+ auto bucket = dir_evac_bucket(&doc_evacuator->overwrite_dir);
if (doc->magic != DOC_MAGIC) {
Debug("cache_evac", "DOC magic: %X %d", (int)dir_tag(&doc_evacuator->overwrite_dir),
(int)dir_offset(&doc_evacuator->overwrite_dir));
@@ -660,7 +668,9 @@ Vol::evacuateDocReadDone(int event, Event *e)
DDebug("cache_evac", "evacuateDocReadDone %X offset %d", (int)doc->key.slice32(0),
(int)dir_offset(&doc_evacuator->overwrite_dir));
- b = evacuate[dir_evac_bucket(&doc_evacuator->overwrite_dir)].head;
+ if (evac_bucket_valid(bucket)) {
+ b = evacuate[bucket].head;
+ }
while (b) {
if (dir_offset(&b->dir) == dir_offset(&doc_evacuator->overwrite_dir)) {
break;
@@ -928,7 +938,7 @@ agg_copy(char *p, CacheVC *vc)
inline void
Vol::evacuate_cleanup_blocks(int i)
{
- EvacuationBlock *b = evacuate[i].head;
+ EvacuationBlock *b = evac_bucket_valid(i) ? evacuate[i].head : nullptr;
while (b) {
if (b->f.done && ((header->phase != dir_phase(&b->dir) && header->write_pos > this->vol_offset(&b->dir)) ||
(header->phase == dir_phase(&b->dir) && header->write_pos <= this->vol_offset(&b->dir)))) {
diff --git a/iocore/cache/Makefile.am b/iocore/cache/Makefile.am
index 307f05bf7c4..c38c201718b 100644
--- a/iocore/cache/Makefile.am
+++ b/iocore/cache/Makefile.am
@@ -105,6 +105,7 @@ test_LDADD = \
$(top_builddir)/iocore/aio/libinkaio.a \
$(top_builddir)/src/tscore/libtscore.la \
$(top_builddir)/lib/records/librecords_p.a \
+ $(top_builddir)/lib/fastlz/libfastlz.a \
$(top_builddir)/iocore/eventsystem/libinkevent.a \
@HWLOC_LIBS@ \
@LIBPCRE@ \
@@ -116,7 +117,6 @@ test_LDADD = \
@YAMLCPP_LIBS@ \
-lm
-if EXPENSIVE_TESTS
check_PROGRAMS = \
test_Cache \
test_RWW \
@@ -129,7 +129,6 @@ check_PROGRAMS = \
test_Update_L_to_S \
test_Update_S_to_L \
test_Update_header
-endif
test_main_SOURCES = \
./test/main.cc \
diff --git a/iocore/cache/P_CacheHosting.h b/iocore/cache/P_CacheHosting.h
index e6eaa764edf..ba6fecd812d 100644
--- a/iocore/cache/P_CacheHosting.h
+++ b/iocore/cache/P_CacheHosting.h
@@ -38,7 +38,7 @@ struct CacheHostRecord {
int Init(CacheType typ);
int Init(matcher_line *line_info, CacheType typ);
- void UpdateMatch(CacheHostResult *r, char *rd);
+ void UpdateMatch(CacheHostResult *r);
void Print() const;
~CacheHostRecord()
diff --git a/iocore/cache/P_CacheInternal.h b/iocore/cache/P_CacheInternal.h
index 756eb14deef..8630d3a064f 100644
--- a/iocore/cache/P_CacheInternal.h
+++ b/iocore/cache/P_CacheInternal.h
@@ -614,9 +614,9 @@ free_CacheVC(CacheVC *cont)
cont->blocks.clear();
cont->writer_buf.clear();
cont->alternate_index = CACHE_ALT_INDEX_DEFAULT;
- if (cont->scan_vol_map) {
- ats_free(cont->scan_vol_map);
- }
+
+ ats_free(cont->scan_vol_map);
+
memset((char *)&cont->vio, 0, cont->size_to_init);
#ifdef CACHE_STAT_PAGES
ink_assert(!cont->stat_link.next && !cont->stat_link.prev);
diff --git a/iocore/cache/P_CacheTest.h b/iocore/cache/P_CacheTest.h
index 47f0cfd3bbe..40d797cdb18 100644
--- a/iocore/cache/P_CacheTest.h
+++ b/iocore/cache/P_CacheTest.h
@@ -130,7 +130,7 @@ struct CacheTestSM : public RegressionSM {
#define CACHE_SM(_t, _sm, _f) \
struct CacheTestSM__##_sm : public CacheTestSM { \
void \
- make_request_internal() _f \
+ make_request_internal() override _f \
\
CacheTestSM__##_sm(RegressionTest *t) \
: CacheTestSM(t, #_sm) \
@@ -139,7 +139,7 @@ struct CacheTestSM : public RegressionSM {
\
CacheTestSM__##_sm(const CacheTestSM__##_sm &xsm) : CacheTestSM(xsm) {} \
RegressionSM * \
- clone() \
+ clone() override \
{ \
return new CacheTestSM__##_sm(*this); \
} \
diff --git a/iocore/cache/P_CacheVol.h b/iocore/cache/P_CacheVol.h
index 95449289a53..b2f69b13ce2 100644
--- a/iocore/cache/P_CacheVol.h
+++ b/iocore/cache/P_CacheVol.h
@@ -206,6 +206,12 @@ struct Vol : public Continuation {
int dir_check(bool fix);
int db_check(bool fix);
+ bool
+ evac_bucket_valid(off_t bucket)
+ {
+ return (bucket >= 0 && bucket < evacuate_size);
+ }
+
int
is_io_in_progress()
{
@@ -456,10 +462,13 @@ int vol_init(Vol *d, char *s, off_t blocks, off_t skip, bool clear);
TS_INLINE EvacuationBlock *
evacuation_block_exists(Dir *dir, Vol *p)
{
- EvacuationBlock *b = p->evacuate[dir_evac_bucket(dir)].head;
- for (; b; b = b->link.next)
- if (dir_offset(&b->dir) == dir_offset(dir))
- return b;
+ auto bucket = dir_evac_bucket(dir);
+ if (p->evac_bucket_valid(bucket)) {
+ EvacuationBlock *b = p->evacuate[bucket].head;
+ for (; b; b = b->link.next)
+ if (dir_offset(&b->dir) == dir_offset(dir))
+ return b;
+ }
return nullptr;
}
diff --git a/iocore/cache/test/main.cc b/iocore/cache/test/main.cc
index 3585ab68bea..0e2379d3bde 100644
--- a/iocore/cache/test/main.cc
+++ b/iocore/cache/test/main.cc
@@ -23,10 +23,34 @@
#define CATCH_CONFIG_MAIN
#include "main.h"
+#include "tscore/ts_file.h"
+
+#include
#define THREADS 1
#define DIAGS_LOG_FILE "diags.log"
+// Create a new temp directory and return it
+std::string
+temp_prefix()
+{
+ char buffer[PATH_MAX];
+ std::error_code err;
+ const char *tmpdir = getenv("TMPDIR");
+ if (tmpdir == nullptr) {
+ tmpdir = "/tmp";
+ }
+ snprintf(buffer, sizeof(buffer), "%s/cachetest.XXXXXX", tmpdir);
+ auto prefix = ts::file::path(mkdtemp(buffer));
+ bool result = ts::file::create_directories(prefix / "var" / "trafficserver", err, 0755);
+ if (!result) {
+ Debug("cache test", "Failed to create directories for test: %s(%s)", prefix.c_str(), err.message().c_str());
+ }
+ ink_assert(result);
+
+ return prefix.string();
+}
+
void
test_done()
{
@@ -48,7 +72,7 @@ struct EventProcessorListener : Catch::TestEventListenerBase {
diags->show_location = SHOW_LOCATION_DEBUG;
mime_init();
- Layout::create();
+ Layout::create(temp_prefix());
RecProcessInit(RECM_STAND_ALONE);
LibRecordsConfigInit();
ink_net_init(ts::ModuleVersion(1, 0, ts::ModuleVersion::PRIVATE));
@@ -66,8 +90,6 @@ struct EventProcessorListener : Catch::TestEventListenerBase {
std::string src_dir = std::string(TS_ABS_TOP_SRCDIR) + "/iocore/cache/test";
Layout::get()->sysconfdir = src_dir;
- Layout::get()->prefix = src_dir;
- ::remove("./test/var/trafficserver/cache.db");
}
};
CATCH_REGISTER_LISTENER(EventProcessorListener);
diff --git a/iocore/cache/test/stub.cc b/iocore/cache/test/stub.cc
index dcc232c1a2c..b247da1bb39 100644
--- a/iocore/cache/test/stub.cc
+++ b/iocore/cache/test/stub.cc
@@ -21,11 +21,17 @@
limitations under the License.
*/
+#include
+
#include "HttpSessionManager.h"
#include "HttpBodyFactory.h"
#include "DiagsConfig.h"
#include "ts/InkAPIPrivateIOCore.h"
+#include "tscore/I_Version.h"
+
+AppVersionInfo appVersionInfo;
+
void
initialize_thread_for_http_sessions(EThread *, int)
{
@@ -70,6 +76,11 @@ HttpHookState::init(TSHttpHookID id, HttpAPIHooks const *global, HttpAPIHooks co
{
}
+void
+api_init()
+{
+}
+
APIHook const *
HttpHookState::getNext()
{
@@ -157,18 +168,19 @@ ts::svtoi(TextView src, TextView *out, int base)
}
void
-HostStatus::setHostStatus(const char *name, TSHostStatus status, const unsigned int down_time, const unsigned int reason)
+HostStatus::setHostStatus(const std::string_view name, const TSHostStatus status, const unsigned int down_time,
+ const unsigned int reason)
{
}
HostStatRec *
-HostStatus::getHostStatus(const char *name)
+HostStatus::getHostStatus(const std::string_view name)
{
return nullptr;
}
void
-HostStatus::createHostStat(const char *name, const char *data)
+HostStatus::createHostStat(const std::string_view name, const char *data)
{
}
diff --git a/iocore/cache/test/test_RWW.cc b/iocore/cache/test/test_RWW.cc
index ee86681d58b..dab2f607195 100644
--- a/iocore/cache/test/test_RWW.cc
+++ b/iocore/cache/test/test_RWW.cc
@@ -275,7 +275,7 @@ class CacheRWWErrorTest : public CacheRWWTest
case VC_EVENT_ERROR:
case VC_EVENT_EOS:
if (this->_size == LARGE_FILE) {
- REQUIRE(base->vio->ndone >= 1 * 1024 * 1024 - sizeof(Doc));
+ REQUIRE(base->vio->ndone >= int64_t(1 * 1024 * 1024 - sizeof(Doc)));
} else {
REQUIRE(base->vio->ndone == 0);
}
diff --git a/iocore/dns/DNS.cc b/iocore/dns/DNS.cc
index e7e9ebd2bbd..9ad2ea32285 100644
--- a/iocore/dns/DNS.cc
+++ b/iocore/dns/DNS.cc
@@ -524,12 +524,12 @@ DNSHandler::open_con(sockaddr const *target, bool failed, int icon, bool over_tc
}
return false;
} else {
- ns_down[icon] = 0;
if (cur_con.eio.start(pd, &cur_con, EVENTIO_READ) < 0) {
Error("[iocore_dns] open_con: Failed to add %d server to epoll list\n", icon);
} else {
- cur_con.num = icon;
- Debug("dns", "opening connection %s SUCCEEDED for %d", ip_text, icon);
+ cur_con.num = icon;
+ ns_down[icon] = 0;
+ Debug("dns", "opening connection %s on fd %d SUCCEEDED for %d", ip_text, cur_con.fd, icon);
}
ret = true;
}
@@ -729,6 +729,15 @@ void
DNSHandler::failover()
{
Debug("dns", "failover: initiating failover attempt, current name_server=%d", name_server);
+ if (!ns_down[name_server]) {
+ ip_text_buffer buff;
+ // mark this nameserver as down
+ Debug("dns", "failover: Marking nameserver %d as down", name_server);
+ ns_down[name_server] = 1;
+ Warning("connection to DNS server %s lost, marking as down",
+ ats_ip_ntop(&m_res->nsaddr_list[name_server].sa, buff, sizeof(buff)));
+ }
+
// no hope, if we have only one server
if (m_res->nscount > 1) {
ip_text_buffer buff1, buff2;
@@ -766,6 +775,8 @@ DNSHandler::failover()
ip_text_buffer buff;
Warning("failover: connection to DNS server %s lost, retrying", ats_ip_ntop(&ip.sa, buff, sizeof(buff)));
}
+ // Make sure retries are done even if no more requests.
+ this_ethread()->schedule_in(this, DNS_PRIMARY_RETRY_PERIOD);
}
/** Mark one of the nameservers as down. */
@@ -779,6 +790,8 @@ DNSHandler::rr_failure(int ndx)
Debug("dns", "rr_failure: Marking nameserver %d as down", ndx);
ns_down[ndx] = 1;
Warning("connection to DNS server %s lost, marking as down", ats_ip_ntop(&m_res->nsaddr_list[ndx].sa, buff, sizeof(buff)));
+ // Make sure retries are done even if no more requests.
+ this_ethread()->schedule_in(this, DNS_PRIMARY_RETRY_PERIOD);
}
int nscount = m_res->nscount;
@@ -971,6 +984,11 @@ DNSHandler::check_and_reset_tcp_conn()
int
DNSHandler::mainEvent(int event, Event *e)
{
+ // If this was a scheduled retry event, clear the associated flag.
+ if (e && e->cookie == RETRY_COOKIE) {
+ this->nameserver_retry_in_flight_p = false;
+ }
+
recv_dns(event, e);
if (dns_ns_rr) {
if (DNS_CONN_MODE::TCP_RETRY == dns_conn_mode) {
@@ -1017,10 +1035,6 @@ DNSHandler::mainEvent(int event, Event *e)
write_dns(this);
}
- if (std::any_of(ns_down, ns_down + n_con, [](int f) { return f != 0; })) {
- this_ethread()->schedule_at(this, DNS_PRIMARY_RETRY_PERIOD);
- }
-
return EVENT_CONT;
}
diff --git a/iocore/dns/P_DNSProcessor.h b/iocore/dns/P_DNSProcessor.h
index 4c16f32f993..7d0bba98d2c 100644
--- a/iocore/dns/P_DNSProcessor.h
+++ b/iocore/dns/P_DNSProcessor.h
@@ -169,9 +169,16 @@ struct DNSHandler : public Continuation {
DNSConnection udpcon[MAX_NAMED];
Queue entries;
Queue triggered;
- int in_flight = 0;
- int name_server = 0;
- int in_write_dns = 0;
+ int in_flight = 0;
+ int name_server = 0;
+ int in_write_dns = 0;
+ /// Rate limiter for down nameserver retries.
+ /// Don't schedule another if there is already one in flight.
+ std::atomic nameserver_retry_in_flight_p{false};
+ /// Marker for event cookie to indicate it's a nameserver retry event.
+ /// @note Can't be @c constexpr because of the cast.
+ static inline void *const RETRY_COOKIE{reinterpret_cast(0x2)};
+
HostEnt *hostent_cache = nullptr;
int ns_down[MAX_NAMED];
@@ -212,16 +219,16 @@ struct DNSHandler : public Continuation {
(ink_hrtime)HRTIME_SECONDS(dns_failover_period));
Debug("dns", "\tdelta time is %" PRId64 "", (Thread::get_hrtime() - crossed_failover_number[i]));
}
- return (crossed_failover_number[i] &&
- ((Thread::get_hrtime() - crossed_failover_number[i]) > HRTIME_SECONDS(dns_failover_period)));
+ return ns_down[i] || (crossed_failover_number[i] &&
+ ((Thread::get_hrtime() - crossed_failover_number[i]) > HRTIME_SECONDS(dns_failover_period)));
}
bool
failover_soon(int i)
{
- return (crossed_failover_number[i] &&
- ((Thread::get_hrtime() - crossed_failover_number[i]) >
- (HRTIME_SECONDS(dns_failover_try_period + failover_soon_number[i] * FAILOVER_SOON_RETRY))));
+ return ns_down[i] || (crossed_failover_number[i] &&
+ ((Thread::get_hrtime() - crossed_failover_number[i]) >
+ (HRTIME_SECONDS(dns_failover_try_period + failover_soon_number[i] * FAILOVER_SOON_RETRY))));
}
void recv_dns(int event, Event *e);
diff --git a/iocore/dns/P_SplitDNSProcessor.h b/iocore/dns/P_SplitDNSProcessor.h
index 5fa119e5f5a..7424ccfdc7b 100644
--- a/iocore/dns/P_SplitDNSProcessor.h
+++ b/iocore/dns/P_SplitDNSProcessor.h
@@ -79,7 +79,7 @@ struct SplitDNS : public ConfigInfo {
SplitDNS();
~SplitDNS() override;
- void *getDNSRecord(const char *hostname);
+ void *getDNSRecord(ts::TextView hostname);
void findServer(RequestData *rdata, SplitDNSResult *result);
DNS_table *m_DNSSrvrTable = nullptr;
@@ -116,46 +116,34 @@ SplitDNSConfig::isSplitDNSEnabled()
class DNSRequestData : public RequestData
{
public:
- DNSRequestData();
-
- char *get_string() override;
+ DNSRequestData() = default;
+ char *
+ get_string() override
+ {
+ ink_release_assert(!"Do not get a writeable string from a DNS request");
+ };
const char *get_host() override;
sockaddr const *get_ip() override; // unused required virtual method.
sockaddr const *get_client_ip() override; // unused required virtual method.
- const char *m_pHost = nullptr;
+ ts::TextView m_pHost;
};
-/* --------------------------------------------------------------
- DNSRequestData::get_string()
- -------------------------------------------------------------- */
-TS_INLINE
-DNSRequestData::DNSRequestData() {}
-
-/* --------------------------------------------------------------
- DNSRequestData::get_string()
- -------------------------------------------------------------- */
-TS_INLINE char *
-DNSRequestData::get_string()
-{
- return ats_strdup((char *)m_pHost);
-}
-
/* --------------------------------------------------------------
DNSRequestData::get_host()
-------------------------------------------------------------- */
-TS_INLINE const char *
+inline const char *
DNSRequestData::get_host()
{
- return m_pHost;
+ return m_pHost.data();
}
/* --------------------------------------------------------------
DNSRequestData::get_ip()
-------------------------------------------------------------- */
-TS_INLINE sockaddr const *
+inline sockaddr const *
DNSRequestData::get_ip()
{
return nullptr;
@@ -164,7 +152,7 @@ DNSRequestData::get_ip()
/* --------------------------------------------------------------
DNSRequestData::get_client_ip()
-------------------------------------------------------------- */
-TS_INLINE sockaddr const *
+inline sockaddr const *
DNSRequestData::get_client_ip()
{
return nullptr;
diff --git a/iocore/dns/SRV.h b/iocore/dns/SRV.h
index ff75689e74e..560223636e4 100644
--- a/iocore/dns/SRV.h
+++ b/iocore/dns/SRV.h
@@ -25,7 +25,6 @@
#include
#include "tscore/ink_platform.h"
-#include "I_HostDBProcessor.h"
struct HostDBInfo;
diff --git a/iocore/dns/SplitDNS.cc b/iocore/dns/SplitDNS.cc
index 802e97981cd..b993f1e7f51 100644
--- a/iocore/dns/SplitDNS.cc
+++ b/iocore/dns/SplitDNS.cc
@@ -178,9 +178,9 @@ SplitDNSConfig::print()
SplitDNS::getDNSRecord()
-------------------------------------------------------------- */
void *
-SplitDNS::getDNSRecord(const char *hostname)
+SplitDNS::getDNSRecord(ts::TextView hostname)
{
- Debug("splitdns", "Called SplitDNS::getDNSRecord(%s)", hostname);
+ Debug("splitdns", "Called SplitDNS::getDNSRecord(%.*s)", int(hostname.size()), hostname.data());
DNSRequestData *pRD = DNSReqAllocator.alloc();
pRD->m_pHost = hostname;
@@ -191,7 +191,7 @@ SplitDNS::getDNSRecord(const char *hostname)
DNSReqAllocator.free(pRD);
if (DNS_SRVR_SPECIFIED == res.r) {
- return (void *)&(res.m_rec->m_servers);
+ return &(res.m_rec->m_servers);
}
Debug("splitdns", "Fail to match a valid splitdns rule, fallback to default dns resolver");
diff --git a/iocore/eventsystem/I_Event.h b/iocore/eventsystem/I_Event.h
index 022f0adc7ec..10aad0ae518 100644
--- a/iocore/eventsystem/I_Event.h
+++ b/iocore/eventsystem/I_Event.h
@@ -200,6 +200,11 @@ class Event : public Action
// inherited from Action::cancel
// virtual void cancel(Continuation * c = nullptr);
+#ifdef ENABLE_EVENT_TRACKER
+ void set_location();
+ const void *get_location() const;
+#endif
+
void free();
EThread *ethread = nullptr;
@@ -239,6 +244,14 @@ class Event : public Action
private:
void *operator new(size_t size); // use the fast allocators
+#ifdef ENABLE_EVENT_TRACKER
+ /**
+ Address of who scheduled this event
+ To get symbols, use backtrace_symbols(3) or external tools like `addr2line(1)` (Linux) or `atos(1)`(BSD).
+ */
+ const void *_location = nullptr;
+#endif
+
public:
LINK(Event, link);
diff --git a/iocore/eventsystem/I_ProxyAllocator.h b/iocore/eventsystem/I_ProxyAllocator.h
index 613b08f0638..cfd16075c1e 100644
--- a/iocore/eventsystem/I_ProxyAllocator.h
+++ b/iocore/eventsystem/I_ProxyAllocator.h
@@ -83,10 +83,11 @@ void thread_freeup(Allocator &a, ProxyAllocator &l);
#endif
-#define THREAD_FREE(_p, _a, _t) \
+#define THREAD_FREE(_p, _a, _tin) \
do { \
::_a.destroy_if_enabled(_p); \
if (!cmd_disable_pfreelist) { \
+ Thread *_t = (_tin); \
*(char **)_p = (char *)_t->_a.freelist; \
_t->_a.freelist = _p; \
_t->_a.allocated++; \
diff --git a/iocore/eventsystem/Makefile.am b/iocore/eventsystem/Makefile.am
index 3421eb40167..dbffb1098b5 100644
--- a/iocore/eventsystem/Makefile.am
+++ b/iocore/eventsystem/Makefile.am
@@ -72,7 +72,8 @@ libinkevent_a_SOURCES = \
check_PROGRAMS = test_IOBuffer \
test_EventSystem \
- test_MIOBufferWriter
+ test_MIOBufferWriter \
+ benchmark_ProxyAllocator
test_LD_FLAGS = \
@AM_LDFLAGS@ \
@@ -114,6 +115,11 @@ test_MIOBufferWriter_SOURCES = unit_tests/test_MIOBufferWriter.cc
test_MIOBufferWriter_CPPFLAGS = $(test_CPP_FLAGS)
test_MIOBufferWriter_LDFLAGS = $(test_LD_FLAGS)
+benchmark_ProxyAllocator_SOURCES = unit_tests/benchmark_ProxyAllocator.cc
+benchmark_ProxyAllocator_CPPFLAGS = $(test_CPP_FLAGS)
+benchmark_ProxyAllocator_LDFLAGS = $(test_LD_FLAGS)
+benchmark_ProxyAllocator_LDADD = $(test_LD_ADD)
+
include $(top_srcdir)/build/tidy.mk
clang-tidy-local: $(DIST_SOURCES)
diff --git a/iocore/eventsystem/P_UnixEThread.h b/iocore/eventsystem/P_UnixEThread.h
index e4e8cb8e832..50d1885bcbc 100644
--- a/iocore/eventsystem/P_UnixEThread.h
+++ b/iocore/eventsystem/P_UnixEThread.h
@@ -32,6 +32,7 @@
#include "I_EThread.h"
#include "I_EventProcessor.h"
+#include
const ink_hrtime DELAY_FOR_RETRY = HRTIME_MSECONDS(10);
extern ink_thread_key ethread_key;
@@ -39,7 +40,12 @@ extern ink_thread_key ethread_key;
TS_INLINE Event *
EThread::schedule_imm(Continuation *cont, int callback_event, void *cookie)
{
- Event *e = ::eventAllocator.alloc();
+ Event *e = ::eventAllocator.alloc();
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule(e->init(cont, 0, 0));
@@ -48,7 +54,12 @@ EThread::schedule_imm(Continuation *cont, int callback_event, void *cookie)
TS_INLINE Event *
EThread::schedule_at(Continuation *cont, ink_hrtime t, int callback_event, void *cookie)
{
- Event *e = ::eventAllocator.alloc();
+ Event *e = ::eventAllocator.alloc();
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule(e->init(cont, t, 0));
@@ -57,7 +68,12 @@ EThread::schedule_at(Continuation *cont, ink_hrtime t, int callback_event, void
TS_INLINE Event *
EThread::schedule_in(Continuation *cont, ink_hrtime t, int callback_event, void *cookie)
{
- Event *e = ::eventAllocator.alloc();
+ Event *e = ::eventAllocator.alloc();
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule(e->init(cont, get_hrtime() + t, 0));
@@ -66,7 +82,12 @@ EThread::schedule_in(Continuation *cont, ink_hrtime t, int callback_event, void
TS_INLINE Event *
EThread::schedule_every(Continuation *cont, ink_hrtime t, int callback_event, void *cookie)
{
- Event *e = ::eventAllocator.alloc();
+ Event *e = ::eventAllocator.alloc();
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
if (t < 0) {
@@ -108,7 +129,12 @@ EThread::schedule(Event *e)
TS_INLINE Event *
EThread::schedule_imm_local(Continuation *cont, int callback_event, void *cookie)
{
- Event *e = EVENT_ALLOC(eventAllocator, this);
+ Event *e = EVENT_ALLOC(eventAllocator, this);
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule_local(e->init(cont, 0, 0));
@@ -117,7 +143,12 @@ EThread::schedule_imm_local(Continuation *cont, int callback_event, void *cookie
TS_INLINE Event *
EThread::schedule_at_local(Continuation *cont, ink_hrtime t, int callback_event, void *cookie)
{
- Event *e = EVENT_ALLOC(eventAllocator, this);
+ Event *e = EVENT_ALLOC(eventAllocator, this);
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule_local(e->init(cont, t, 0));
@@ -126,7 +157,12 @@ EThread::schedule_at_local(Continuation *cont, ink_hrtime t, int callback_event,
TS_INLINE Event *
EThread::schedule_in_local(Continuation *cont, ink_hrtime t, int callback_event, void *cookie)
{
- Event *e = EVENT_ALLOC(eventAllocator, this);
+ Event *e = EVENT_ALLOC(eventAllocator, this);
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule_local(e->init(cont, get_hrtime() + t, 0));
@@ -135,7 +171,12 @@ EThread::schedule_in_local(Continuation *cont, ink_hrtime t, int callback_event,
TS_INLINE Event *
EThread::schedule_every_local(Continuation *cont, ink_hrtime t, int callback_event, void *cookie)
{
- Event *e = EVENT_ALLOC(eventAllocator, this);
+ Event *e = EVENT_ALLOC(eventAllocator, this);
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
if (t < 0) {
diff --git a/iocore/eventsystem/P_UnixEventProcessor.h b/iocore/eventsystem/P_UnixEventProcessor.h
index a8ba4f475a1..9feec535b93 100644
--- a/iocore/eventsystem/P_UnixEventProcessor.h
+++ b/iocore/eventsystem/P_UnixEventProcessor.h
@@ -135,6 +135,11 @@ EventProcessor::schedule_imm(Continuation *cont, EventType et, int callback_even
#ifdef ENABLE_TIME_TRACE
e->start_time = Thread::get_hrtime();
#endif
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule(e->init(cont, 0, 0), et);
@@ -147,6 +152,11 @@ EventProcessor::schedule_at(Continuation *cont, ink_hrtime t, EventType et, int
ink_assert(t > 0);
ink_assert(et < MAX_EVENT_TYPES);
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule(e->init(cont, t, 0), et);
@@ -158,6 +168,11 @@ EventProcessor::schedule_in(Continuation *cont, ink_hrtime t, EventType et, int
Event *e = eventAllocator.alloc();
ink_assert(et < MAX_EVENT_TYPES);
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
return schedule(e->init(cont, Thread::get_hrtime() + t, 0), et);
@@ -170,6 +185,11 @@ EventProcessor::schedule_every(Continuation *cont, ink_hrtime t, EventType et, i
ink_assert(t != 0);
ink_assert(et < MAX_EVENT_TYPES);
+
+#ifdef ENABLE_EVENT_TRACKER
+ e->set_location();
+#endif
+
e->callback_event = callback_event;
e->cookie = cookie;
if (t < 0) {
diff --git a/iocore/eventsystem/UnixEvent.cc b/iocore/eventsystem/UnixEvent.cc
index b5589aedd10..983cf7a2632 100644
--- a/iocore/eventsystem/UnixEvent.cc
+++ b/iocore/eventsystem/UnixEvent.cc
@@ -29,6 +29,8 @@
*****************************************************************************/
#include "P_EventSystem.h"
+#include "tscore/ink_stack_trace.h"
+
ClassAllocator eventAllocator("eventAllocator", 256);
void
@@ -104,3 +106,19 @@ Event::schedule_every(ink_hrtime aperiod, int acallback_event)
ethread->EventQueueExternal.enqueue_local(this);
}
}
+
+#ifdef ENABLE_EVENT_TRACKER
+
+void
+Event::set_location()
+{
+ _location = ink_backtrace(3);
+}
+
+const void *
+Event::get_location() const
+{
+ return _location;
+}
+
+#endif
diff --git a/iocore/eventsystem/UnixEventProcessor.cc b/iocore/eventsystem/UnixEventProcessor.cc
index c3a30fe494c..ec860943d42 100644
--- a/iocore/eventsystem/UnixEventProcessor.cc
+++ b/iocore/eventsystem/UnixEventProcessor.cc
@@ -30,6 +30,7 @@
#include
#endif
#include "tscore/ink_defs.h"
+#include "tscore/ink_hw.h"
#include "tscore/hugepages.h"
/// Global singleton.
diff --git a/iocore/eventsystem/unit_tests/benchmark_ProxyAllocator.cc b/iocore/eventsystem/unit_tests/benchmark_ProxyAllocator.cc
new file mode 100644
index 00000000000..660ddcc3c71
--- /dev/null
+++ b/iocore/eventsystem/unit_tests/benchmark_ProxyAllocator.cc
@@ -0,0 +1,112 @@
+/** @file
+
+Simple benchmark for ProxyAllocator
+
+@section license License
+
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#define CATCH_CONFIG_ENABLE_BENCHMARKING
+#define CATCH_CONFIG_MAIN
+#include "catch.hpp"
+
+#include "I_EventSystem.h"
+#include "I_Thread.h"
+#include "tscore/Allocator.h"
+
+namespace
+{
+class BThread : public Thread
+{
+public:
+ void
+ set_specific() override
+ {
+ Thread::set_specific();
+ }
+
+ void
+ execute() override
+ {
+ }
+};
+
+struct BItem {
+ char buffer[128];
+};
+
+} // namespace
+
+// THREAD_ALLOC/FREE requires allocators be global variables and are named after one of the defined ProxyAllocator members
+ClassAllocator ioAllocator("io");
+
+#define OLD_THREAD_FREE(_p, _a, _t) \
+ do { \
+ ::_a.destroy_if_enabled(_p); \
+ if (!cmd_disable_pfreelist) { \
+ *(char **)_p = (char *)_t->_a.freelist; \
+ _t->_a.freelist = _p; \
+ _t->_a.allocated++; \
+ if (thread_freelist_high_watermark > 0 && _t->_a.allocated > thread_freelist_high_watermark) \
+ thread_freeup(::_a.raw(), _t->_a); \
+ } else { \
+ ::_a.raw().free_void(_p); \
+ } \
+ } while (0)
+
+TEST_CASE("ProxyAllocator", "[iocore]")
+{
+ Thread *bench_thread = new BThread();
+ bench_thread->set_specific();
+ int count = 10000;
+
+ // set higher than iteration count so the freeup doesn't run during benchmark
+ thread_freelist_high_watermark = count + 1;
+
+ BENCHMARK("thread_free old")
+ {
+ auto items = std::vector();
+ items.reserve(count);
+ for (int i = 0; i < count; i++) {
+ auto *item = THREAD_ALLOC(ioAllocator, this_thread());
+ items.push_back(item);
+ }
+
+ for (auto item : items) {
+ OLD_THREAD_FREE(item, ioAllocator, this_thread());
+ }
+ return bench_thread->ioAllocator.allocated;
+ };
+
+ BENCHMARK("thread_free new")
+ {
+ auto items = std::vector();
+ items.reserve(count);
+ for (int i = 0; i < count; i++) {
+ auto *item = THREAD_ALLOC(ioAllocator, this_thread());
+ items.push_back(item);
+ }
+
+ for (auto item : items) {
+ THREAD_FREE(item, ioAllocator, this_thread());
+ }
+ return bench_thread->ioAllocator.allocated;
+ };
+
+ delete bench_thread;
+}
diff --git a/iocore/hostdb/HostDB.cc b/iocore/hostdb/HostDB.cc
index 89968f6ca03..9a1ab5cce30 100644
--- a/iocore/hostdb/HostDB.cc
+++ b/iocore/hostdb/HostDB.cc
@@ -26,14 +26,18 @@
#include "P_RefCountCacheSerializer.h"
#include "tscore/I_Layout.h"
#include "Show.h"
-#include "tscore/Tokenizer.h"
+#include "tscore/ts_file.h"
#include "tscore/ink_apidefs.h"
+#include "tscore/bwf_std_format.h"
#include
#include
#include
#include
#include
+#include
+
+using ts::TextView;
HostDBProcessor hostDBProcessor;
int HostDBProcessor::hostdb_strict_round_robin = 0;
@@ -50,78 +54,215 @@ unsigned int hostdb_ip_stale_interval = HOST_DB_IP_STALE;
unsigned int hostdb_ip_timeout_interval = HOST_DB_IP_TIMEOUT;
unsigned int hostdb_ip_fail_timeout_interval = HOST_DB_IP_FAIL_TIMEOUT;
unsigned int hostdb_serve_stale_but_revalidate = 0;
-unsigned int hostdb_hostfile_check_interval = 86400; // 1 day
+ts_seconds hostdb_hostfile_check_interval{std::chrono::hours(24)};
// Epoch timestamp of the current hosts file check.
-ink_time_t hostdb_current_interval = 0;
+ts_time hostdb_current_interval{TS_TIME_ZERO};
// Epoch timestamp of the last time we actually checked for a hosts file update.
-static ink_time_t hostdb_last_interval = 0;
+static ts_time hostdb_last_interval{TS_TIME_ZERO};
// Epoch timestamp when we updated the hosts file last.
-static ink_time_t hostdb_hostfile_update_timestamp = 0;
-static char hostdb_filename[PATH_NAME_MAX] = DEFAULT_HOST_DB_FILENAME;
-int hostdb_max_count = DEFAULT_HOST_DB_SIZE;
-char hostdb_hostfile_path[PATH_NAME_MAX] = "";
-int hostdb_sync_frequency = 0;
-int hostdb_disable_reverse_lookup = 0;
-int hostdb_max_iobuf_index = BUFFER_SIZE_INDEX_32K;
-
-// Verify the generic storage is sufficient to cover all alternate members.
-static_assert(sizeof(HostDBApplicationInfo::allotment) == sizeof(HostDBApplicationInfo),
- "Generic storage for HostDBApplicationInfo is smaller than the union storage.");
+static ts_time hostdb_hostfile_update_timestamp{TS_TIME_ZERO};
+static char hostdb_filename[PATH_NAME_MAX] = DEFAULT_HOST_DB_FILENAME;
+int hostdb_max_count = DEFAULT_HOST_DB_SIZE;
+static ts::file::path hostdb_hostfile_path;
+ts_seconds hostdb_sync_frequency{0};
+int hostdb_disable_reverse_lookup = 0;
+int hostdb_max_iobuf_index = BUFFER_SIZE_INDEX_32K;
ClassAllocator hostDBContAllocator("hostDBContAllocator");
+namespace
+{
+/** Assign raw storage to an @c IpAddr
+ *
+ * @param ip Destination.
+ * @param af IP family.
+ * @param ptr Raw data for an address of family @a af.
+ */
+void
+ip_addr_set(IpAddr &ip, ///< Target storage.
+ uint8_t af, ///< Address format.
+ void const *ptr ///< Raw address data
+)
+{
+ if (AF_INET6 == af) {
+ ip = *static_cast(ptr);
+ } else if (AF_INET == af) {
+ ip = *static_cast(ptr);
+ } else {
+ ip.invalidate();
+ }
+}
+
+unsigned int
+HOSTDB_CLIENT_IP_HASH(sockaddr const *lhs, IpAddr const &rhs)
+{
+ unsigned int zret = ~static_cast(0);
+ if (lhs->sa_family == rhs.family()) {
+ if (rhs.isIp4()) {
+ in_addr_t ip1 = ats_ip4_addr_cast(lhs);
+ in_addr_t ip2 = rhs._addr._ip4;
+ zret = (ip1 >> 16) ^ ip1 ^ ip2 ^ (ip2 >> 16);
+ } else if (rhs.isIp6()) {
+ uint32_t const *ip1 = ats_ip_addr32_cast(lhs);
+ uint32_t const *ip2 = rhs._addr._u32;
+ for (int i = 0; i < 4; ++i, ++ip1, ++ip2) {
+ zret ^= (*ip1 >> 16) ^ *ip1 ^ *ip2 ^ (*ip2 >> 16);
+ }
+ }
+ }
+ return zret & 0xFFFF;
+}
+
+} // namespace
+
+char const *
+name_of(HostDBType t)
+{
+ switch (t) {
+ case HostDBType::UNSPEC:
+ return "*";
+ case HostDBType::ADDR:
+ return "Address";
+ case HostDBType::SRV:
+ return "SRV";
+ case HostDBType::HOST:
+ return "Reverse DNS";
+ }
+ return "";
+}
+
+/** Template for creating conversions and initialization for @c std::chrono based configuration variables.
+ *
+ * @tparam V The exact type of the configuration variable.
+ *
+ * The tricky template code is to enable having a class instance for each configuration variable, instead of for each _type_ of
+ * configuration variable. This is required because the callback interface requires functions and so the actual storage must be
+ * accessible from that function. *
+ */
+template struct ConfigDuration {
+ using self_type = ConfigDuration;
+ V *_var; ///< Pointer to the variable to control.
+
+ /** Constructor.
+ *
+ * @param v The variable to update.
+ */
+ ConfigDuration(V &v) : _var(&v) {}
+
+ /// Convert to the mgmt (configuration) type.
+ static MgmtInt
+ to_mgmt(void const *data)
+ {
+ return static_cast(static_cast(data)->count());
+ }
+
+ /// Convert from the mgmt (configuration) type.
+ static void
+ from_mgmt(void *data, MgmtInt i)
+ {
+ *static_cast(data) = V{i};
+ }
+
+ /// The conversion structure, which handles @c MgmtInt.
+ static inline const MgmtConverter Conversions{&to_mgmt, &from_mgmt};
+
+ /** Process start up conversion from configuration.
+ *
+ * @param type The data type in the configuration.
+ * @param data The data in the configuration.
+ * @param var Pointer to the variable to update.
+ * @return @c true if @a data was successfully converted and stored, @c false if not.
+ *
+ * @note @a var is the target variable because it was explicitly set to be the value of @a _var in @c Enable.
+ */
+ static bool
+ callback(char const *, RecDataT type, RecData data, void *var)
+ {
+ if (RECD_INT == type) {
+ (*self_type::Conversions.store_int)(var, data.rec_int);
+ return true;
+ }
+ return false;
+ }
+
+ /** Enable.
+ *
+ * @param name Name of the configuration variable.
+ *
+ * This enables both reading from the configuration and handling the callback for dynamic
+ * updates of the variable.
+ */
+ void
+ Enable(std::string_view name)
+ {
+ Enable_Config_Var(name, &self_type::callback, _var);
+ }
+};
+
+ConfigDuration HostDBDownServerCacheTimeVar{HttpConfig::m_master.oride.down_server_timeout};
+// Make the conversions visible to the plugin API. This allows exporting just the conversions
+// without having to export the class definition. Again, the compiler doesn't allow doing this
+// in one line.
+extern MgmtConverter const &HostDBDownServerCacheTimeConv;
+MgmtConverter const &HostDBDownServerCacheTimeConv = HostDBDownServerCacheTimeVar.Conversions;
+
+// Not run time configurable, therefore no support beyond this class needed.
+ConfigDuration HostDBSyncFrequencyVar{hostdb_sync_frequency};
+
+void
+HostDB_Config_Init()
+{
+ HostDBDownServerCacheTimeVar.Enable("proxy.config.http.down_server.cache_time");
+ HostDBSyncFrequencyVar.Enable("proxy.config.cache.hostdb.sync_frequency");
+}
+
// Static configuration information
HostDBCache hostDB;
-void ParseHostFile(const char *path, unsigned int interval);
+void ParseHostFile(ts::file::path const &path, ts_seconds interval);
-char *
-HostDBInfo::srvname(HostDBRoundRobin *rr) const
+auto
+HostDBInfo::assign(sa_family_t af, void const *addr) -> self_type &
{
- if (!is_srv || !data.srv.srv_offset) {
- return nullptr;
- }
- return reinterpret_cast(rr) + data.srv.srv_offset;
+ type = HostDBType::ADDR;
+ ip_addr_set(data.ip, af, addr);
+ return *this;
}
-static inline bool
-is_addr_valid(uint8_t af, ///< Address family (format of data)
- void *ptr ///< Raw address data (not a sockaddr variant!)
-)
+auto
+HostDBInfo::assign(IpAddr const &addr) -> self_type &
{
- return (AF_INET == af && INADDR_ANY != *(reinterpret_cast(ptr))) ||
- (AF_INET6 == af && !IN6_IS_ADDR_UNSPECIFIED(reinterpret_cast(ptr)));
+ type = HostDBType::ADDR;
+ data.ip = addr;
+ return *this;
}
-static inline void
-ip_addr_set(sockaddr *ip, ///< Target storage, sockaddr compliant.
- uint8_t af, ///< Address format.
- void *ptr ///< Raw address data
-)
+auto
+HostDBInfo::assign(SRV const *srv, char const *name) -> self_type &
{
- if (AF_INET6 == af) {
- ats_ip6_set(ip, *static_cast(ptr));
- } else if (AF_INET == af) {
- ats_ip4_set(ip, *static_cast(ptr));
- } else {
- ats_ip_invalidate(ip);
- }
+ type = HostDBType::SRV;
+ data.srv.srv_weight = srv->weight;
+ data.srv.srv_priority = srv->priority;
+ data.srv.srv_port = srv->port;
+ data.srv.key = srv->key;
+ data.srv.srv_offset = reinterpret_cast(this) - name;
+ return *this;
+}
+
+char const *
+HostDBInfo::srvname() const
+{
+ return data.srv.srv_offset ? reinterpret_cast(this) + data.srv.srv_offset : nullptr;
}
-static inline void
-ip_addr_set(IpAddr &ip, ///< Target storage.
- uint8_t af, ///< Address format.
- void *ptr ///< Raw address data
+static inline bool
+is_addr_valid(uint8_t af, ///< Address family (format of data)
+ void *ptr ///< Raw address data (not a sockaddr variant!)
)
{
- if (AF_INET6 == af) {
- ip = *static_cast(ptr);
- } else if (AF_INET == af) {
- ip = *static_cast(ptr);
- } else {
- ip.invalidate();
- }
+ return (AF_INET == af && INADDR_ANY != *(reinterpret_cast(ptr))) ||
+ (AF_INET6 == af && !IN6_IS_ADDR_UNSPECIFIED(reinterpret_cast(ptr)));
}
inline void
@@ -169,18 +310,12 @@ string_for(HostDBMark mark)
static Action *register_ShowHostDB(Continuation *c, HTTPHdr *h);
HostDBHash &
-HostDBHash::set_host(const char *name, int len)
+HostDBHash::set_host(TextView name)
{
host_name = name;
- host_len = len;
- if (host_name && SplitDNSConfig::isSplitDNSEnabled()) {
- const char *scan;
- // I think this is checking for a hostname that is just an address.
- for (scan = host_name; *scan != '\0' && (ParseRules::is_digit(*scan) || '.' == *scan || ':' == *scan); ++scan) {
- ;
- }
- if ('\0' != *scan) {
+ if (!host_name.empty() && SplitDNSConfig::isSplitDNSEnabled()) {
+ if (TS_SUCCESS != ip.load(host_name)) {
// config is released in the destructor, because we must make sure values we
// get out of it don't evaporate while @a this is still around.
if (!pSD) {
@@ -206,7 +341,7 @@ HostDBHash::refresh()
const char *server_line = dns_server ? dns_server->x_dns_ip_line : nullptr;
uint8_t m = static_cast(db_mark); // be sure of the type.
- ctx.update(host_name, host_len);
+ ctx.update(host_name.data(), host_name.size());
ctx.update(reinterpret_cast(&port), sizeof(port));
ctx.update(&m, sizeof(m));
if (server_line) {
@@ -235,10 +370,7 @@ HostDBHash::~HostDBHash()
}
}
-HostDBCache::HostDBCache()
-{
- hosts_file_ptr = new RefCountedHostsFileMap();
-}
+HostDBCache::HostDBCache() {}
bool
HostDBCache::is_pending_dns_for_hash(const CryptoHash &hash)
@@ -252,6 +384,14 @@ HostDBCache::is_pending_dns_for_hash(const CryptoHash &hash)
return false;
}
+std::shared_ptr
+HostDBCache::acquire_host_file()
+{
+ std::shared_lock lock(host_file_mutex);
+ auto zret = host_file;
+ return zret;
+}
+
HostDBCache *
HostDBProcessor::cache()
{
@@ -259,16 +399,16 @@ HostDBProcessor::cache()
}
struct HostDBBackgroundTask : public Continuation {
- int frequency;
- ink_hrtime start_time;
+ ts_seconds frequency;
+ ts_hr_time start_time;
virtual int sync_event(int event, void *edata) = 0;
int wait_event(int event, void *edata);
- HostDBBackgroundTask(int frequency);
+ HostDBBackgroundTask(ts_seconds frequency);
};
-HostDBBackgroundTask::HostDBBackgroundTask(int frequency) : Continuation(new_ProxyMutex()), frequency(frequency), start_time(0)
+HostDBBackgroundTask::HostDBBackgroundTask(ts_seconds frequency) : Continuation(new_ProxyMutex()), frequency(frequency)
{
SET_HANDLER(&HostDBBackgroundTask::sync_event);
}
@@ -276,11 +416,11 @@ HostDBBackgroundTask::HostDBBackgroundTask(int frequency) : Continuation(new_Pro
int
HostDBBackgroundTask::wait_event(int, void *)
{
- ink_hrtime next_sync = HRTIME_SECONDS(this->frequency) - (Thread::get_hrtime() - start_time);
+ auto next_sync = this->frequency - (ts_hr_clock::now() - start_time);
SET_HANDLER(&HostDBBackgroundTask::sync_event);
- if (next_sync > HRTIME_MSECONDS(100)) {
- eventProcessor.schedule_in(this, next_sync, ET_TASK);
+ if (next_sync > ts_milliseconds{100}) {
+ eventProcessor.schedule_in(this, std::chrono::duration_cast(next_sync).count(), ET_TASK);
} else {
eventProcessor.schedule_imm(this, ET_TASK);
}
@@ -290,16 +430,16 @@ HostDBBackgroundTask::wait_event(int, void *)
struct HostDBSync : public HostDBBackgroundTask {
std::string storage_path;
std::string full_path;
- HostDBSync(int frequency, const std::string &storage_path, const std::string &full_path)
+ HostDBSync(ts_seconds frequency, const std::string &storage_path, const std::string &full_path)
: HostDBBackgroundTask(frequency), storage_path(std::move(storage_path)), full_path(std::move(full_path)){};
int
sync_event(int, void *) override
{
SET_HANDLER(&HostDBSync::wait_event);
- start_time = Thread::get_hrtime();
+ start_time = ts_hr_clock::now();
- new RefCountCacheSerializer(this, hostDBProcessor.cache()->refcountcache, this->frequency, this->storage_path,
- this->full_path);
+ new RefCountCacheSerializer(this, hostDBProcessor.cache()->refcountcache, this->frequency.count(),
+ this->storage_path, this->full_path);
return EVENT_DONE;
}
};
@@ -327,8 +467,6 @@ HostDBCache::start(int flags)
REC_ReadConfigInteger(hostdb_max_size, "proxy.config.hostdb.max_size");
// number of partitions
REC_ReadConfigInt32(hostdb_partitions, "proxy.config.hostdb.partitions");
- // how often to sync hostdb to disk
- REC_EstablishStaticConfigInt32(hostdb_sync_frequency, "proxy.config.cache.hostdb.sync_frequency");
REC_EstablishStaticConfigInt32(hostdb_max_iobuf_index, "proxy.config.hostdb.io.max_buffer_index");
@@ -337,13 +475,13 @@ HostDBCache::start(int flags)
}
// Setup the ref-counted cache (this must be done regardless of syncing or not).
- this->refcountcache = new RefCountCache(hostdb_partitions, hostdb_max_size, hostdb_max_count, HostDBInfo::version(),
- "proxy.process.hostdb.cache.");
+ this->refcountcache = new RefCountCache(hostdb_partitions, hostdb_max_size, hostdb_max_count, HostDBRecord::Version,
+ "proxy.process.hostdb.cache.");
//
// Load and sync HostDB, if we've asked for it.
//
- if (hostdb_sync_frequency > 0) {
+ if (hostdb_sync_frequency.count() > 0) {
// If proxy.config.hostdb.storage_path is not set, use the local state dir. If it is set to
// a relative path, make it relative to the prefix.
if (storage_path[0] == '\0') {
@@ -366,7 +504,7 @@ HostDBCache::start(int flags)
Debug("hostdb", "Opening %s, partitions=%d storage_size=%" PRIu64 " items=%d", full_path, hostdb_partitions, hostdb_max_size,
hostdb_max_count);
- int load_ret = LoadRefCountCacheFromPath(*this->refcountcache, full_path, HostDBInfo::unmarshall);
+ int load_ret = LoadRefCountCacheFromPath(*this->refcountcache, full_path, HostDBRecord::unmarshall);
if (load_ret != 0) {
Warning("Error loading cache from %s: %d", full_path, load_ret);
}
@@ -411,13 +549,12 @@ HostDBProcessor::start(int, size_t)
REC_EstablishStaticConfigInt32U(hostdb_ip_stale_interval, "proxy.config.hostdb.verify_after");
REC_EstablishStaticConfigInt32U(hostdb_ip_fail_timeout_interval, "proxy.config.hostdb.fail.timeout");
REC_EstablishStaticConfigInt32U(hostdb_serve_stale_but_revalidate, "proxy.config.hostdb.serve_stale_for");
- REC_EstablishStaticConfigInt32U(hostdb_hostfile_check_interval, "proxy.config.hostdb.host_file.interval");
REC_EstablishStaticConfigInt32U(hostdb_round_robin_max_count, "proxy.config.hostdb.round_robin_max_count");
//
// Set up hostdb_current_interval
//
- hostdb_current_interval = ink_time();
+ hostdb_current_interval = ts_clock::now();
HostDBContinuation *b = hostDBContAllocator.alloc();
SET_CONTINUATION_HANDLER(b, (HostDBContHandler)&HostDBContinuation::backgroundEvent);
@@ -430,18 +567,14 @@ HostDBProcessor::start(int, size_t)
void
HostDBContinuation::init(HostDBHash const &the_hash, Options const &opt)
{
- hash = the_hash;
- if (hash.host_name) {
+ hash = the_hash;
+ hash.host_name = hash.host_name.prefix(static_cast(sizeof(hash_host_name_store) - 1));
+ if (!hash.host_name.empty()) {
// copy to backing store.
- if (hash.host_len > static_cast(sizeof(hash_host_name_store) - 1)) {
- hash.host_len = sizeof(hash_host_name_store) - 1;
- }
- memcpy(hash_host_name_store, hash.host_name, hash.host_len);
- } else {
- hash.host_len = 0;
+ memcpy(hash_host_name_store, hash.host_name);
}
- hash_host_name_store[hash.host_len] = 0;
- hash.host_name = hash_host_name_store;
+ hash_host_name_store[hash.host_name.size()] = 0;
+ hash.host_name.assign(hash_host_name_store, hash.host_name.size());
host_res_style = opt.host_res_style;
dns_lookup_timeout = opt.timeout;
@@ -460,7 +593,7 @@ HostDBContinuation::refresh_hash()
{
Ptr old_bucket_mutex = hostDB.refcountcache->lock_for_key(hash.hash.fold());
// We're not pending DNS anymore.
- remove_trigger_pending_dns();
+ remove_and_trigger_pending_dns();
hash.refresh();
// Update the mutex if it's from the bucket.
// Some call sites modify this after calling @c init so need to check.
@@ -470,34 +603,22 @@ HostDBContinuation::refresh_hash()
}
static bool
-reply_to_cont(Continuation *cont, HostDBInfo *r, bool is_srv = false)
+reply_to_cont(Continuation *cont, HostDBRecord *r, bool is_srv = false)
{
- if (r == nullptr || r->is_srv != is_srv || r->is_failed()) {
+ if (r == nullptr || r->is_srv() != is_srv || r->is_failed()) {
cont->handleEvent(is_srv ? EVENT_SRV_LOOKUP : EVENT_HOST_DB_LOOKUP, nullptr);
return false;
}
- if (r->reverse_dns) {
- if (!r->hostname()) {
+ if (r->record_type != HostDBType::HOST) {
+ if (!r->name()) {
ink_assert(!"missing hostname");
cont->handleEvent(is_srv ? EVENT_SRV_LOOKUP : EVENT_HOST_DB_LOOKUP, nullptr);
Warning("bogus entry deleted from HostDB: missing hostname");
hostDB.refcountcache->erase(r->key);
return false;
}
- Debug("hostdb", "hostname = %s", r->hostname());
- }
-
- if (!r->is_srv && r->round_robin) {
- if (!r->rr()) {
- ink_assert(!"missing round-robin");
- cont->handleEvent(is_srv ? EVENT_SRV_LOOKUP : EVENT_HOST_DB_LOOKUP, nullptr);
- Warning("bogus entry deleted from HostDB: missing round-robin");
- hostDB.refcountcache->erase(r->key);
- return false;
- }
- ip_text_buffer ipb;
- Debug("hostdb", "RR of %d with %d good, 1st IP = %s", r->rr()->rrcount, r->rr()->good, ats_ip_ntop(r->ip(), ipb, sizeof ipb));
+ Debug("hostdb", "hostname = %s", r->name());
}
cont->handleEvent(is_srv ? EVENT_SRV_LOOKUP : EVENT_HOST_DB_LOOKUP, r);
@@ -541,73 +662,57 @@ db_mark_for(IpAddr const &ip)
return ip.isIp6() ? HOSTDB_MARK_IPV6 : HOSTDB_MARK_IPV4;
}
-Ptr
+HostDBRecord::Handle
probe(const Ptr &mutex, HostDBHash const &hash, bool ignore_timeout)
{
+ static const Ptr NO_RECORD;
+
// If hostdb is disabled, don't return anything
if (!hostdb_enable) {
- return Ptr();
+ return NO_RECORD;
}
// Otherwise HostDB is enabled, so we'll do our thing
ink_assert(this_ethread() == hostDB.refcountcache->lock_for_key(hash.hash.fold())->thread_holding);
uint64_t folded_hash = hash.hash.fold();
- // get the item from cache
- Ptr r = hostDB.refcountcache->get(folded_hash);
+ // get the record from cache
+ Ptr record = hostDB.refcountcache->get(folded_hash);
// If there was nothing in the cache-- this is a miss
- if (r.get() == nullptr) {
- return r;
+ if (record.get() == nullptr) {
+ return record;
}
// If the dns response was failed, and we've hit the failed timeout, lets stop returning it
- if (r->is_failed() && r->is_ip_fail_timeout()) {
- return make_ptr((HostDBInfo *)nullptr);
- // if we aren't ignoring timeouts, and we are past it-- then remove the item
- } else if (!ignore_timeout && r->is_ip_timeout() && !r->serve_stale_but_revalidate()) {
+ if (record->is_failed() && record->is_ip_fail_timeout()) {
+ return NO_RECORD;
+ // if we aren't ignoring timeouts, and we are past it-- then remove the record
+ } else if (!ignore_timeout && record->is_ip_timeout() && !record->serve_stale_but_revalidate()) {
HOSTDB_INCREMENT_DYN_STAT(hostdb_ttl_expires_stat);
- return make_ptr((HostDBInfo *)nullptr);
+ return NO_RECORD;
}
// If the record is stale, but we want to revalidate-- lets start that up
- if ((!ignore_timeout && r->is_ip_stale() && !r->reverse_dns) || (r->is_ip_timeout() && r->serve_stale_but_revalidate())) {
+ if ((!ignore_timeout && record->is_ip_stale() && record->record_type != HostDBType::HOST) ||
+ (record->is_ip_timeout() && record->serve_stale_but_revalidate())) {
if (hostDB.is_pending_dns_for_hash(hash.hash)) {
- Debug("hostdb", "stale %u %u %u, using it and pending to refresh it", r->ip_interval(), r->ip_timestamp,
- r->ip_timeout_interval);
- return r;
- }
- Debug("hostdb", "stale %u %u %u, using it and refreshing it", r->ip_interval(), r->ip_timestamp, r->ip_timeout_interval);
+ Debug("hostdb", "%s",
+ ts::bwprint(ts::bw_dbg, "stale {} {} {}, using with pending refresh", record->ip_interval(),
+ record->ip_timestamp.time_since_epoch(), record->ip_timeout_interval)
+ .c_str());
+ return record;
+ }
+ Debug("hostdb", "%s",
+ ts::bwprint(ts::bw_dbg, "stale {} {} {}, using while refresh", record->ip_interval(),
+ record->ip_timestamp.time_since_epoch(), record->ip_timeout_interval)
+ .c_str());
HostDBContinuation *c = hostDBContAllocator.alloc();
HostDBContinuation::Options copt;
- copt.host_res_style = host_res_style_for(r->ip());
+ copt.host_res_style = record->af_family == AF_INET6 ? HOST_RES_IPV6_ONLY : HOST_RES_IPV4_ONLY;
c->init(hash, copt);
c->do_dns();
}
- return r;
-}
-
-//
-// Insert a HostDBInfo into the database
-// A null value indicates that the block is empty.
-//
-HostDBInfo *
-HostDBContinuation::insert(unsigned int attl)
-{
- uint64_t folded_hash = hash.hash.fold();
-
- ink_assert(this_ethread() == hostDB.refcountcache->lock_for_key(folded_hash)->thread_holding);
-
- HostDBInfo *r = HostDBInfo::alloc();
- r->key = folded_hash;
-
- r->ip_timestamp = hostdb_current_interval;
- r->ip_timeout_interval = std::clamp(attl, 1u, HOST_DB_MAX_TTL);
-
- Debug("hostdb", "inserting for: %.*s: (hash: %" PRIx64 ") now: %u timeout: %u ttl: %u", hash.host_len, hash.host_name,
- folded_hash, r->ip_timestamp, r->ip_timeout_interval, attl);
-
- hostDB.refcountcache->put(folded_hash, r, 0, r->expiry_time());
- return r;
+ return record;
}
//
@@ -658,7 +763,7 @@ HostDBProcessor::getby(Continuation *cont, cb_process_result_pfn cb_process_resu
MUTEX_TRY_LOCK(lock2, bucket_mutex, thread);
if (lock2.is_locked()) {
// If we can get the lock and a level 1 probe succeeds, return
- Ptr r = probe(bucket_mutex, hash, false);
+ HostDBRecord::Handle r = probe(bucket_mutex, hash, false);
if (r) {
// fail, see if we should retry with alternate
if (hash.db_mark != HOSTDB_MARK_SRV && r->is_failed() && hash.host_name) {
@@ -667,10 +772,10 @@ HostDBProcessor::getby(Continuation *cont, cb_process_result_pfn cb_process_resu
if (!loop) {
// No retry -> final result. Return it.
if (hash.db_mark == HOSTDB_MARK_SRV) {
- Debug("hostdb", "immediate SRV answer for %.*s from hostdb", hash.host_len, hash.host_name);
- Debug("dns_srv", "immediate SRV answer for %.*s from hostdb", hash.host_len, hash.host_name);
+ Debug("hostdb", "immediate SRV answer for %.*s from hostdb", int(hash.host_name.size()), hash.host_name.data());
+ Debug("dns_srv", "immediate SRV answer for %.*s from hostdb", int(hash.host_name.size()), hash.host_name.data());
} else if (hash.host_name) {
- Debug("hostdb", "immediate answer for %.*s", hash.host_len, hash.host_name);
+ Debug("hostdb", "immediate answer for %.*s", int(hash.host_name.size()), hash.host_name.data());
} else {
Debug("hostdb", "immediate answer for %s", hash.ip.isValid() ? hash.ip.toString(ipb, sizeof ipb) : "");
}
@@ -688,12 +793,13 @@ HostDBProcessor::getby(Continuation *cont, cb_process_result_pfn cb_process_resu
}
}
if (hash.db_mark == HOSTDB_MARK_SRV) {
- Debug("hostdb", "delaying (force=%d) SRV answer for %.*s [timeout = %d]", force_dns, hash.host_len, hash.host_name,
- opt.timeout);
- Debug("dns_srv", "delaying (force=%d) SRV answer for %.*s [timeout = %d]", force_dns, hash.host_len, hash.host_name,
- opt.timeout);
+ Debug("hostdb", "delaying (force=%d) SRV answer for %.*s [timeout = %d]", force_dns, int(hash.host_name.size()),
+ hash.host_name.data(), opt.timeout);
+ Debug("dns_srv", "delaying (force=%d) SRV answer for %.*s [timeout = %d]", force_dns, int(hash.host_name.size()),
+ hash.host_name.data(), opt.timeout);
} else if (hash.host_name) {
- Debug("hostdb", "delaying (force=%d) answer for %.*s [timeout %d]", force_dns, hash.host_len, hash.host_name, opt.timeout);
+ Debug("hostdb", "delaying (force=%d) answer for %.*s [timeout %d]", force_dns, int(hash.host_name.size()),
+ hash.host_name.data(), opt.timeout);
} else {
Debug("hostdb", "delaying (force=%d) answer for %s [timeout %d]", force_dns,
hash.ip.isValid() ? hash.ip.toString(ipb, sizeof ipb) : "", opt.timeout);
@@ -726,7 +832,7 @@ HostDBProcessor::getbyname_re(Continuation *cont, const char *ahostname, int len
ink_assert(nullptr != ahostname);
// Load the hash data.
- hash.set_host(ahostname, ahostname ? (len ? len : strlen(ahostname)) : 0);
+ hash.set_host({ahostname, ahostname ? (len ? len : strlen(ahostname)) : 0});
// Leave hash.ip invalid
hash.port = 0;
hash.db_mark = db_mark_for(opt.host_res_style);
@@ -743,7 +849,7 @@ HostDBProcessor::getbynameport_re(Continuation *cont, const char *ahostname, int
ink_assert(nullptr != ahostname);
// Load the hash data.
- hash.set_host(ahostname, ahostname ? (len ? len : strlen(ahostname)) : 0);
+ hash.set_host({ahostname, ahostname ? (len ? len : strlen(ahostname)) : 0});
// Leave hash.ip invalid
hash.port = opt.port;
hash.db_mark = db_mark_for(opt.host_res_style);
@@ -782,7 +888,7 @@ HostDBProcessor::getSRVbyname_imm(Continuation *cont, cb_process_result_pfn proc
ink_assert(nullptr != hostname);
- hash.set_host(hostname, len ? len : strlen(hostname));
+ hash.set_host({hostname, len ? len : strlen(hostname)});
// Leave hash.ip invalid
hash.port = 0;
hash.db_mark = HOSTDB_MARK_SRV;
@@ -802,7 +908,7 @@ HostDBProcessor::getbyname_imm(Continuation *cont, cb_process_result_pfn process
ink_assert(nullptr != hostname);
- hash.set_host(hostname, len ? len : strlen(hostname));
+ hash.set_host({hostname, len ? len : strlen(hostname)});
// Leave hash.ip invalid
// TODO: May I rename the wrapper name to getbynameport_imm ? - oknet
// By comparing getbyname_re and getbynameport_re, the hash.port should be 0 if only get hostinfo by name.
@@ -837,150 +943,35 @@ HostDBProcessor::iterate(Continuation *cont)
return &c->action;
}
-static void
-do_setby(HostDBInfo *r, HostDBApplicationInfo *app, const char *hostname, IpAddr const &ip, bool is_srv = false)
-{
- HostDBRoundRobin *rr = r->rr();
-
- if (is_srv && (!r->is_srv || !rr)) {
- return;
- }
-
- if (rr) {
- if (is_srv) {
- uint32_t key = makeHostHash(hostname);
- for (int i = 0; i < rr->rrcount; i++) {
- if (key == rr->info(i).data.srv.key && !strcmp(hostname, rr->info(i).srvname(rr))) {
- Debug("hostdb", "immediate setby for %s", hostname);
- rr->info(i).app.allotment.application1 = app->allotment.application1;
- rr->info(i).app.allotment.application2 = app->allotment.application2;
- return;
- }
- }
- } else {
- for (int i = 0; i < rr->rrcount; i++) {
- if (rr->info(i).ip() == ip) {
- Debug("hostdb", "immediate setby for %s", hostname ? hostname : "");
- rr->info(i).app.allotment.application1 = app->allotment.application1;
- rr->info(i).app.allotment.application2 = app->allotment.application2;
- return;
- }
- }
- }
- } else {
- if (r->reverse_dns || (!r->round_robin && ip == r->ip())) {
- Debug("hostdb", "immediate setby for %s", hostname ? hostname : "");
- r->app.allotment.application1 = app->allotment.application1;
- r->app.allotment.application2 = app->allotment.application2;
- }
- }
-}
-
-void
-HostDBProcessor::setby(const char *hostname, int len, sockaddr const *ip, HostDBApplicationInfo *app)
-{
- if (!hostdb_enable) {
- return;
- }
-
- HostDBHash hash;
- hash.set_host(hostname, hostname ? (len ? len : strlen(hostname)) : 0);
- hash.ip.assign(ip);
- hash.port = ip ? ats_ip_port_host_order(ip) : 0;
- hash.db_mark = db_mark_for(ip);
- hash.refresh();
-
- // Attempt to find the result in-line, for level 1 hits
-
- Ptr mutex = hostDB.refcountcache->lock_for_key(hash.hash.fold());
- EThread *thread = this_ethread();
- MUTEX_TRY_LOCK(lock, mutex, thread);
-
- if (lock.is_locked()) {
- Ptr r = probe(mutex, hash, false);
- if (r) {
- do_setby(r.get(), app, hostname, hash.ip);
- }
- return;
- }
- // Create a continuation to do a deeper probe in the background
-
- HostDBContinuation *c = hostDBContAllocator.alloc();
- c->init(hash);
- c->app.allotment.application1 = app->allotment.application1;
- c->app.allotment.application2 = app->allotment.application2;
- SET_CONTINUATION_HANDLER(c, (HostDBContHandler)&HostDBContinuation::setbyEvent);
- thread->schedule_in(c, MUTEX_RETRY_DELAY);
-}
-
-void
-HostDBProcessor::setby_srv(const char *hostname, int len, const char *target, HostDBApplicationInfo *app)
-{
- if (!hostdb_enable || !hostname || !target) {
- return;
- }
-
- HostDBHash hash;
- hash.set_host(hostname, len ? len : strlen(hostname));
- hash.port = 0;
- hash.db_mark = HOSTDB_MARK_SRV;
- hash.refresh();
-
- // Create a continuation to do a deeper probe in the background
-
- HostDBContinuation *c = hostDBContAllocator.alloc();
- c->init(hash);
- ink_strlcpy(c->srv_target_name, target, MAXDNAME);
- c->app.allotment.application1 = app->allotment.application1;
- c->app.allotment.application2 = app->allotment.application2;
- SET_CONTINUATION_HANDLER(c, (HostDBContHandler)&HostDBContinuation::setbyEvent);
- eventProcessor.schedule_imm(c);
-}
-int
-HostDBContinuation::setbyEvent(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
-{
- Ptr r = probe(mutex, hash, false);
-
- if (r) {
- do_setby(r.get(), &app, hash.host_name, hash.ip, is_srv());
- }
-
- hostdb_cont_free(this);
- return EVENT_DONE;
-}
-
// Lookup done, insert into the local table, return data to the
// calling continuation.
// NOTE: if "i" exists it means we already allocated the space etc, just return
//
-HostDBInfo *
-HostDBContinuation::lookup_done(IpAddr const &ip, const char *aname, bool around_robin, unsigned int ttl_seconds, SRVHosts *srv,
- HostDBInfo *r)
+Ptr
+HostDBContinuation::lookup_done(TextView query_name, ts_seconds answer_ttl, SRVHosts *srv, Ptr record)
{
ink_assert(this_ethread() == hostDB.refcountcache->lock_for_key(hash.hash.fold())->thread_holding);
- if (!ip.isValid() || !aname || !aname[0]) {
+ ink_assert(record);
+ if (query_name.empty()) {
if (is_byname()) {
- Debug("hostdb", "lookup_done() failed for '%.*s'", hash.host_len, hash.host_name);
+ Debug("hostdb", "lookup_done() failed for '%.*s'", int(hash.host_name.size()), hash.host_name.data());
} else if (is_srv()) {
- Debug("dns_srv", "SRV failed for '%.*s'", hash.host_len, hash.host_name);
+ Debug("dns_srv", "SRV failed for '%.*s'", int(hash.host_name.size()), hash.host_name.data());
} else {
ip_text_buffer b;
Debug("hostdb", "failed for %s", hash.ip.toString(b, sizeof b));
}
- if (r == nullptr) {
- r = insert(hostdb_ip_fail_timeout_interval);
- } else {
- r->ip_timestamp = hostdb_current_interval;
- r->ip_timeout_interval = std::clamp(hostdb_ip_fail_timeout_interval, 1u, HOST_DB_MAX_TTL);
- }
+ record->ip_timestamp = hostdb_current_interval;
+ record->ip_timeout_interval = ts_seconds(std::clamp(hostdb_ip_fail_timeout_interval, 1u, HOST_DB_MAX_TTL));
- r->round_robin = false;
- r->round_robin_elt = false;
- r->is_srv = is_srv();
- r->reverse_dns = !is_byname() && !is_srv();
+ if (is_srv()) {
+ record->record_type = HostDBType::SRV;
+ } else if (!is_byname()) {
+ record->record_type = HostDBType::HOST;
+ }
- r->set_failed();
- return r;
+ record->set_failed();
+ return record;
} else {
switch (hostdb_ttl_mode) {
@@ -989,65 +980,38 @@ HostDBContinuation::lookup_done(IpAddr const &ip, const char *aname, bool around
case TTL_OBEY:
break;
case TTL_IGNORE:
- ttl_seconds = hostdb_ip_timeout_interval;
+ answer_ttl = ts_seconds(hostdb_ip_timeout_interval);
break;
case TTL_MIN:
- if (hostdb_ip_timeout_interval < ttl_seconds) {
- ttl_seconds = hostdb_ip_timeout_interval;
+ if (ts_seconds(hostdb_ip_timeout_interval) < answer_ttl) {
+ answer_ttl = ts_seconds(hostdb_ip_timeout_interval);
}
break;
case TTL_MAX:
- if (hostdb_ip_timeout_interval > ttl_seconds) {
- ttl_seconds = hostdb_ip_timeout_interval;
+ if (ts_seconds(hostdb_ip_timeout_interval) > answer_ttl) {
+ answer_ttl = ts_seconds(hostdb_ip_timeout_interval);
}
break;
}
- HOSTDB_SUM_DYN_STAT(hostdb_ttl_stat, ttl_seconds);
+ HOSTDB_SUM_DYN_STAT(hostdb_ttl_stat, answer_ttl.count());
- if (r == nullptr) {
- r = insert(ttl_seconds);
- } else {
- // update the TTL
- r->ip_timestamp = hostdb_current_interval;
- r->ip_timeout_interval = std::clamp(ttl_seconds, 1u, HOST_DB_MAX_TTL);
- }
+ // update the TTL
+ record->ip_timestamp = hostdb_current_interval;
+ record->ip_timeout_interval = std::clamp(answer_ttl, ts_seconds(1), ts_seconds(HOST_DB_MAX_TTL));
- r->round_robin_elt = false; // only true for elements explicitly added as RR elements.
if (is_byname()) {
- ip_text_buffer b;
- Debug("hostdb", "done %s TTL %d", ip.toString(b, sizeof b), ttl_seconds);
- ats_ip_set(r->ip(), ip);
- r->round_robin = around_robin;
- r->reverse_dns = false;
- if (hash.host_name != aname) {
- ink_strlcpy(hash_host_name_store, aname, sizeof(hash_host_name_store));
- }
- r->is_srv = false;
+ Debug_bw("hostdb", "done {} TTL {}", hash.host_name, answer_ttl);
} else if (is_srv()) {
- ink_assert(srv && srv->hosts.size() && srv->hosts.size() <= hostdb_round_robin_max_count && around_robin);
-
- r->data.srv.srv_offset = srv->hosts.size();
- r->reverse_dns = false;
- r->is_srv = true;
- r->round_robin = around_robin;
-
- if (hash.host_name != aname) {
- ink_strlcpy(hash_host_name_store, aname, sizeof(hash_host_name_store));
- }
+ ink_assert(srv && srv->hosts.size() && srv->hosts.size() <= hostdb_round_robin_max_count);
+ record->record_type = HostDBType::SRV;
} else {
- Debug("hostdb", "done '%s' TTL %d", aname, ttl_seconds);
- // TODO: check that this is right, it seems that the 2 hostnames are always the same
- r->data.hostname_offset = r->hostname_offset;
- // TODO: consolidate into a single "item type" field?
- r->round_robin = false;
- r->reverse_dns = true;
- r->is_srv = false;
+ Debug_bw("hostdb", "done {} TTL {}", hash.host_name, answer_ttl);
+ record->record_type = HostDBType::HOST;
}
}
- ink_assert(!r->round_robin || !r->reverse_dns);
- return r;
+ return record;
}
int
@@ -1077,28 +1041,7 @@ HostDBContinuation::dnsPendingEvent(int event, Event *e)
}
}
-// for a new HostDBInfo `r`, "inherit" from the old version of yourself if it exists in `old_rr_data`
-static int
-restore_info(HostDBInfo *r, HostDBInfo *old_r, HostDBInfo &old_info, HostDBRoundRobin *old_rr_data)
-{
- if (old_rr_data) {
- for (int j = 0; j < old_rr_data->rrcount; j++) {
- if (ats_ip_addr_eq(old_rr_data->info(j).ip(), r->ip())) {
- r->app = old_rr_data->info(j).app;
- return true;
- }
- }
- } else if (old_r) {
- if (ats_ip_addr_eq(old_info.ip(), r->ip())) {
- r->app = old_info.app;
- return true;
- }
- }
- return false;
-}
-
// DNS lookup result state
-//
int
HostDBContinuation::dnsEvent(int event, HostEnt *e)
{
@@ -1117,7 +1060,7 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
// actual DNS query. If the request rate is high enough this can cause a persistent queue where the
// DNS query is never sent and all requests timeout, even if it was a transient error.
// See issue #8417.
- remove_trigger_pending_dns();
+ remove_and_trigger_pending_dns();
} else {
// "local" signal to give up, usually due this being one of those "other" queries.
// That generally means @a this has already been removed from the queue, but just in case...
@@ -1143,38 +1086,25 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
} else {
bool failed = !e || !e->good;
- bool is_rr = false;
pending_action = nullptr;
- if (is_srv()) {
- is_rr = !failed && (e->srv_hosts.hosts.size() > 0);
- } else if (!failed) {
- is_rr = nullptr != e->ent.h_addr_list[1];
- } else {
- }
-
- ttl = failed ? 0 : e->ttl / 60;
- int ttl_seconds = failed ? 0 : e->ttl; // ebalsa: moving to second accuracy
+ ttl = ts_seconds(failed ? 0 : e->ttl);
- Ptr old_r = probe(mutex, hash, false);
+ Ptr old_r = probe(mutex, hash, false);
// If the DNS lookup failed with NXDOMAIN, remove the old record
if (e && e->isNameError() && old_r) {
hostDB.refcountcache->erase(old_r->key);
old_r = nullptr;
Debug("hostdb", "Removing the old record when the DNS lookup failed with NXDOMAIN");
}
- HostDBInfo old_info;
- if (old_r) {
- old_info = *old_r.get();
- }
- HostDBRoundRobin *old_rr_data = old_r ? old_r->rr() : nullptr;
- int valid_records = 0;
- void *first_record = nullptr;
- uint8_t af = e ? e->ent.h_addrtype : AF_UNSPEC; // address family
- // if this is an RR response, we need to find the first record, as well as the
- // total number of records
- if (is_rr) {
- if (is_srv() && !failed) {
+
+ int valid_records = 0;
+ void *first_record = nullptr;
+ sa_family_t af = e ? e->ent.h_addrtype : AF_UNSPEC; // address family
+
+ // Find the first record and total number of records.
+ if (!failed) {
+ if (is_srv()) {
valid_records = e->srv_hosts.hosts.size();
} else {
void *ptr; // tmp for current entry.
@@ -1194,160 +1124,92 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
++valid_records;
} else {
- Warning("Zero address removed from round-robin list for '%s'", hash.host_name);
+ Warning("Invalid address removed for '%.*s'", int(hash.host_name.size()), hash.host_name.data());
}
}
if (!first_record) {
failed = true;
- is_rr = false;
}
}
- } else if (!failed) {
- first_record = e->ent.h_addr_list[0];
- } // else first is 0.
-
- IpAddr tip; // temp storage if needed.
+ } // else first is nullptr
// In the event that the lookup failed (SOA response-- for example) we want to use hash.host_name, since it'll be ""
- const char *aname = (failed || strlen(hash.host_name)) ? hash.host_name : e->ent.h_name;
-
- const size_t s_size = strlen(aname) + 1;
- const size_t rrsize = is_rr ? HostDBRoundRobin::size(valid_records, e->srv_hosts.srv_hosts_length) : 0;
- // where in our block of memory we are
- int offset = sizeof(HostDBInfo);
-
- int allocSize = s_size + rrsize; // The extra space we need for the rest of the things
-
- HostDBInfo *r = HostDBInfo::alloc(allocSize);
- Debug("hostdb", "allocating %d bytes for %s with %d RR records at [%p]", allocSize, aname, valid_records, r);
- // set up the record
- r->key = hash.hash.fold(); // always set the key
-
- r->hostname_offset = offset;
- ink_strlcpy(r->perm_hostname(), aname, s_size);
- offset += s_size;
+ TextView query_name = (failed || !hash.host_name.empty()) ? hash.host_name : TextView{e->ent.h_name, strlen(e->ent.h_name)};
+ HostDBRecord::Handle r{HostDBRecord::alloc(query_name, valid_records, failed ? 0 : e->srv_hosts.srv_hosts_length)};
+ r->key = hash.hash.fold(); // always set the key
+ r->af_family = af;
+ r->flags.f.failed_p = failed;
// If the DNS lookup failed (errors such as SERVFAIL, etc.) but we have an old record
// which is okay with being served stale-- lets continue to serve the stale record as long as
// the record is willing to be served.
bool serve_stale = false;
if (failed && old_r && old_r->serve_stale_but_revalidate()) {
- r->free();
- r = old_r.get();
+ r = old_r;
serve_stale = true;
} else if (is_byname()) {
- if (first_record) {
- ip_addr_set(tip, af, first_record);
- }
- r = lookup_done(tip, hash.host_name, is_rr, ttl_seconds, failed ? nullptr : &e->srv_hosts, r);
+ lookup_done(hash.host_name, ttl, failed ? nullptr : &e->srv_hosts, r);
} else if (is_srv()) {
- if (!failed) {
- tip._family = AF_INET; // force the tip valid, or else the srv will fail
- }
- r = lookup_done(tip, /* junk: FIXME: is the code in lookup_done() wrong to NEED this? */
- hash.host_name, /* hostname */
- is_rr, /* is round robin, doesnt matter for SRV since we recheck getCount() inside lookup_done() */
- ttl_seconds, /* ttl in seconds */
- failed ? nullptr : &e->srv_hosts, r);
+ lookup_done(hash.host_name, /* hostname */
+ ttl, /* ttl in seconds */
+ failed ? nullptr : &e->srv_hosts, r);
} else if (failed) {
- r = lookup_done(tip, hash.host_name, false, ttl_seconds, nullptr, r);
+ lookup_done(hash.host_name, ttl, nullptr, r);
} else {
- r = lookup_done(hash.ip, e->ent.h_name, false, ttl_seconds, &e->srv_hosts, r);
+ lookup_done(e->ent.h_name, ttl, &e->srv_hosts, r);
}
- // Conditionally make rr record entries
- if (is_rr) {
- r->app.rr.offset = offset;
- // This will only be set if is_rr
- HostDBRoundRobin *rr_data = static_cast(r->rr());
- ;
+ if (!failed) { // implies r != old_r
+ auto rr_info = r->rr_info();
+ // Fill in record type specific data.
if (is_srv()) {
- int skip = 0;
- char *pos = reinterpret_cast(rr_data) + sizeof(HostDBRoundRobin) + valid_records * sizeof(HostDBInfo);
+ char *pos = rr_info.rebind().end();
SRV *q[valid_records];
ink_assert(valid_records <= (int)hostdb_round_robin_max_count);
- // sort
for (int i = 0; i < valid_records; ++i) {
q[i] = &e->srv_hosts.hosts[i];
}
- for (int i = 0; i < valid_records; ++i) {
- for (int ii = i + 1; ii < valid_records; ++ii) {
- if (*q[ii] < *q[i]) {
- SRV *tmp = q[i];
- q[i] = q[ii];
- q[ii] = tmp;
- }
- }
- }
-
- rr_data->good = rr_data->rrcount = valid_records;
- rr_data->current = 0;
- for (int i = 0; i < valid_records; ++i) {
- SRV *t = q[i];
- HostDBInfo &item = rr_data->info(i);
- item.round_robin = 0;
- item.round_robin_elt = 1;
- item.reverse_dns = 0;
- item.is_srv = 1;
- item.data.srv.srv_weight = t->weight;
- item.data.srv.srv_priority = t->priority;
- item.data.srv.srv_port = t->port;
- item.data.srv.key = t->key;
-
- ink_assert((skip + t->host_len) <= e->srv_hosts.srv_hosts_length);
-
- memcpy(pos + skip, t->host, t->host_len);
- item.data.srv.srv_offset = (pos - reinterpret_cast(rr_data)) + skip;
-
- skip += t->host_len;
-
- item.app.allotment.application1 = 0;
- item.app.allotment.application2 = 0;
- Debug("dns_srv", "inserted SRV RR record [%s] into HostDB with TTL: %d seconds", t->host, ttl_seconds);
- }
-
- // restore
- if (old_rr_data) {
- for (int i = 0; i < rr_data->rrcount; ++i) {
- for (int ii = 0; ii < old_rr_data->rrcount; ++ii) {
- if (rr_data->info(i).data.srv.key == old_rr_data->info(ii).data.srv.key) {
- char *new_host = rr_data->info(i).srvname(rr_data);
- char *old_host = old_rr_data->info(ii).srvname(old_rr_data);
- if (!strcmp(new_host, old_host)) {
- rr_data->info(i).app = old_rr_data->info(ii).app;
- }
+ std::sort(q, q + valid_records, [](SRV *lhs, SRV *rhs) -> bool { return *lhs < *rhs; });
+
+ SRV **cur_srv = q;
+ for (auto &item : rr_info) {
+ auto t = *cur_srv++; // get next SRV record pointer.
+ memcpy(pos, t->host, t->host_len); // Append the name to the overall record.
+ item.assign(t, pos);
+ pos += t->host_len;
+ if (old_r) { // migrate as needed.
+ for (auto &old_item : old_r->rr_info()) {
+ if (item.data.srv.key == old_item.data.srv.key && 0 == strcmp(item.srvname(), old_item.srvname())) {
+ item.migrate_from(old_item);
+ break;
}
}
}
+ // Archetypical example - "%zd" doesn't work on FreeBSD, "%ld" doesn't work on Ubuntu, "%lld" doesn't work on Fedora.
+ Debug_bw("dns_srv", "inserted SRV RR record [{}] into HostDB with TTL: {} seconds", t->host, ttl);
}
} else { // Otherwise this is a regular dns response
- rr_data->good = rr_data->rrcount = valid_records;
- rr_data->current = 0;
- for (int i = 0; i < valid_records; ++i) {
- HostDBInfo &item = rr_data->info(i);
- ip_addr_set(item.ip(), af, e->ent.h_addr_list[i]);
- item.round_robin = 0;
- item.round_robin_elt = 1;
- item.reverse_dns = 0;
- item.is_srv = 0;
- if (!restore_info(&item, old_r.get(), old_info, old_rr_data)) {
- item.app.allotment.application1 = 0;
- item.app.allotment.application2 = 0;
+ unsigned idx = 0;
+ for (auto &item : rr_info) {
+ item.assign(af, e->ent.h_addr_list[idx++]);
+ if (old_r) { // migrate as needed.
+ for (auto &old_item : old_r->rr_info()) {
+ if (item.data.ip == old_item.data.ip) {
+ item.migrate_from(old_item);
+ break;
+ }
+ }
}
}
}
}
- if (!failed && !is_rr && !is_srv()) {
- restore_info(r, old_r.get(), old_info, old_rr_data);
- }
- ink_assert(!r || !r->round_robin || !r->reverse_dns);
- ink_assert(failed || !r->round_robin || r->app.rr.offset);
-
- if (!serve_stale) {
- hostDB.refcountcache->put(hash.hash.fold(), r, allocSize, r->expiry_time());
+ if (!serve_stale) { // implies r != old_r
+ hostDB.refcountcache->put(
+ r->key, r.get(), r->_record_size,
+ (r->ip_timestamp + r->ip_timeout_interval + ts_seconds(hostdb_serve_stale_but_revalidate)).time_since_epoch().count());
} else {
- Warning("Fallback to serving stale record, skip re-update of hostdb for %s", aname);
+ Warning("Fallback to serving stale record, skip re-update of hostdb for %.*s", int(query_name.size()), query_name.data());
}
// try to callback the user
@@ -1372,7 +1234,7 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
if (action.continuation->mutex) {
ink_release_assert(action.continuation->mutex == action.mutex);
}
- reply_to_cont(action.continuation, r, is_srv());
+ reply_to_cont(action.continuation, r.get(), is_srv());
}
need_to_reschedule = false;
}
@@ -1389,7 +1251,7 @@ HostDBContinuation::dnsEvent(int event, HostEnt *e)
hostDB.pending_dns_for_hash(hash.hash).remove(this);
// wake up everyone else who is waiting
- remove_trigger_pending_dns();
+ remove_and_trigger_pending_dns();
hostdb_cont_free(this);
@@ -1432,7 +1294,7 @@ HostDBContinuation::iterateEvent(int event, Event *e)
IntrusiveHashMap &partMap = hostDB.refcountcache->get_partition(current_iterate_pos).get_map();
for (const auto &it : partMap) {
- HostDBInfo *r = static_cast(it.item.get());
+ auto *r = static_cast(it.item.get());
if (r && !r->is_failed()) {
action.continuation->handleEvent(EVENT_INTERVAL, static_cast(r));
}
@@ -1498,7 +1360,7 @@ HostDBContinuation::probeEvent(int /* event ATS_UNUSED */, Event *e)
if (!force_dns) {
// Do the probe
//
- Ptr r = probe(mutex, hash, false);
+ Ptr r = probe(mutex, hash, false);
if (r) {
HOSTDB_INCREMENT_DYN_STAT(hostdb_total_hits_stat);
@@ -1544,7 +1406,7 @@ HostDBContinuation::set_check_pending_dns()
}
void
-HostDBContinuation::remove_trigger_pending_dns()
+HostDBContinuation::remove_and_trigger_pending_dns()
{
Queue &q = hostDB.pending_dns_for_hash(hash.hash);
q.remove(this);
@@ -1582,31 +1444,42 @@ HostDBContinuation::do_dns()
{
ink_assert(!action.cancelled);
if (is_byname()) {
- Debug("hostdb", "DNS %s", hash.host_name);
+ Debug("hostdb", "DNS %.*s", int(hash.host_name.size()), hash.host_name.data());
IpAddr tip;
if (0 == tip.load(hash.host_name)) {
- // check 127.0.0.1 format // What the heck does that mean? - AMC
+ // Need to consider if this is necessary - could the record in ResolveInfo be left null and
+ // just the resolved address set?
if (action.continuation) {
- HostDBInfo *r = lookup_done(tip, hash.host_name, false, HOST_DB_MAX_TTL, nullptr);
-
- reply_to_cont(action.continuation, r);
+ HostDBRecord::Handle r{HostDBRecord::alloc(hash.host_name, 1)};
+ r->af_family = tip.family();
+ auto &info = r->rr_info()[0];
+ info.assign(tip);
+ // tricksy - @a reply_to_cont must use an intrusive pointer to @a r if it needs to persist
+ // @a r doesn't go out of scope until after this returns. This continuation shares the mutex
+ // of the target continuation therefore this is always dispatched synchronously.
+ reply_to_cont(action.continuation, r.get());
}
hostdb_cont_free(this);
return;
}
- ts::ConstBuffer hname(hash.host_name, hash.host_len);
- Ptr current_host_file_map = hostDB.hosts_file_ptr;
- HostsFileMap::iterator find_result = current_host_file_map->hosts_file_map.find(hname);
- if (find_result != current_host_file_map->hosts_file_map.end()) {
- if (action.continuation) {
- // Set the TTL based on how often we stat() the host file
- HostDBInfo *r = lookup_done(IpAddr(find_result->second), hash.host_name, false, hostdb_hostfile_check_interval, nullptr);
- reply_to_cont(action.continuation, r);
+
+ // If looking for an IPv4 or IPv6 address, check the host file.
+ if (hash.db_mark == HOSTDB_MARK_IPV6 || hash.db_mark == HOSTDB_MARK_IPV4) {
+ if (auto static_hosts = hostDB.acquire_host_file(); static_hosts) {
+ if (auto spot = static_hosts->find(hash.host_name); spot != static_hosts->end()) {
+ HostDBRecord::Handle r = (hash.db_mark == HOSTDB_MARK_IPV4) ? spot->second.record_4 : spot->second.record_6;
+ // Set the TTL based on how often we stat() the host file
+ if (r && action.continuation) {
+ r = lookup_done(hash.host_name, hostdb_hostfile_check_interval, nullptr, r);
+ reply_to_cont(action.continuation, r.get());
+ hostdb_cont_free(this);
+ return;
+ }
+ }
}
- hostdb_cont_free(this);
- return;
}
}
+
if (hostdb_lookup_timeout) {
timeout = mutex->thread_holding->schedule_in(this, HRTIME_SECONDS(hostdb_lookup_timeout));
} else {
@@ -1623,7 +1496,7 @@ HostDBContinuation::do_dns()
}
pending_action = dnsProcessor.gethostbyname(this, hash.host_name, opt);
} else if (is_srv()) {
- Debug("dns_srv", "SRV lookup of %s", hash.host_name);
+ Debug("dns_srv", "SRV lookup of %.*s", int(hash.host_name.size()), hash.host_name.data());
pending_action = dnsProcessor.getSRVbyname(this, hash.host_name, opt);
} else {
ip_text_buffer ipb;
@@ -1643,44 +1516,41 @@ HostDBContinuation::do_dns()
int
HostDBContinuation::backgroundEvent(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
{
+ std::string dbg;
+
// No nothing if hosts file checking is not enabled.
- if (hostdb_hostfile_check_interval == 0) {
+ if (hostdb_hostfile_check_interval.count() == 0) {
return EVENT_CONT;
}
- hostdb_current_interval = ink_time();
+ hostdb_current_interval = ts_clock::now();
if ((hostdb_current_interval - hostdb_last_interval) > hostdb_hostfile_check_interval) {
bool update_p = false; // do we need to reparse the file and update?
- struct stat info;
- char path[sizeof(hostdb_hostfile_path)];
+ char path[PATH_NAME_MAX];
REC_ReadConfigString(path, "proxy.config.hostdb.host_file.path", sizeof(path));
- if (0 != strcasecmp(hostdb_hostfile_path, path)) {
- Debug("hostdb", "Update host file '%s' -> '%s'", (*hostdb_hostfile_path ? hostdb_hostfile_path : "*-none-*"),
- (*path ? path : "*-none-*"));
+ if (0 != strcasecmp(hostdb_hostfile_path.string(), path)) {
+ Debug("hostdb", "%s",
+ ts::bwprint(dbg, R"(Updating hosts file from "{}" to "{}")", hostdb_hostfile_path, ts::bwf::FirstOf(path, "")).c_str());
// path to hostfile changed
- hostdb_hostfile_update_timestamp = 0; // never updated from this file
- if ('\0' != *path) {
- memcpy(hostdb_hostfile_path, path, sizeof(hostdb_hostfile_path));
- } else {
- hostdb_hostfile_path[0] = 0; // mark as not there
- }
- update_p = true;
- } else {
+ hostdb_hostfile_update_timestamp = TS_TIME_ZERO; // never updated from this file
+ hostdb_hostfile_path = path;
+ update_p = true;
+ } else if (!hostdb_hostfile_path.empty()) {
hostdb_last_interval = hostdb_current_interval;
- if (*hostdb_hostfile_path) {
- if (0 == stat(hostdb_hostfile_path, &info)) {
- if (info.st_mtime > static_cast(hostdb_hostfile_update_timestamp)) {
- update_p = true; // same file but it's changed.
- }
- } else {
- Debug("hostdb", "Failed to stat host file '%s'", hostdb_hostfile_path);
+ std::error_code ec;
+ auto stat{ts::file::status(hostdb_hostfile_path, ec)};
+ if (!ec) {
+ if (ts_clock::from_time_t(modification_time(stat)) > hostdb_hostfile_update_timestamp) {
+ update_p = true; // same file but it's changed.
}
+ } else {
+ Debug("hostdb", "%s", ts::bwprint(dbg, R"(Failed to stat host file "{}" - {})", hostdb_hostfile_path, ec).c_str());
}
}
if (update_p) {
- Debug("hostdb", "Updating from host file");
+ Debug("hostdb", "%s", ts::bwprint(dbg, R"(Updating from host file "{}")", hostdb_hostfile_path).c_str());
ParseHostFile(hostdb_hostfile_path, hostdb_hostfile_check_interval);
}
}
@@ -1688,37 +1558,60 @@ HostDBContinuation::backgroundEvent(int /* event ATS_UNUSED */, Event * /* e ATS
return EVENT_CONT;
}
-char *
-HostDBInfo::hostname() const
-{
- if (!reverse_dns) {
- return nullptr;
- }
-
- return (char *)this + data.hostname_offset;
-}
-
-/*
- * The perm_hostname exists for all records not just reverse dns records.
- */
-char *
-HostDBInfo::perm_hostname() const
+HostDBInfo *
+HostDBRecord::select_best_http(ts_time now, ts_seconds fail_window, sockaddr const *hash_addr)
{
- if (hostname_offset == 0) {
- return nullptr;
+ ink_assert(0 < rr_count && rr_count <= hostdb_round_robin_max_count);
+
+ // @a best_any is set to a base candidate, which may be dead.
+ HostDBInfo *best_any = nullptr;
+ // @a best_alive is set when a valid target has been selected and should be used.
+ HostDBInfo *best_alive = nullptr;
+
+ auto info{this->rr_info()};
+
+ if (HostDBProcessor::hostdb_strict_round_robin) {
+ // Always select the next viable target - select failure means no valid targets at all.
+ best_alive = best_any = this->select_next_rr(now, fail_window);
+ Debug("hostdb", "Using strict round robin - index %d", this->index_of(best_alive));
+ } else if (HostDBProcessor::hostdb_timed_round_robin > 0) {
+ auto ctime = rr_ctime.load(); // cache for atomic update.
+ auto ntime = ctime + ts_seconds(HostDBProcessor::hostdb_timed_round_robin);
+ // Check and update RR if it's time - this always yields a valid target if there is one.
+ if (now > ntime && rr_ctime.compare_exchange_strong(ctime, ntime)) {
+ best_alive = best_any = this->select_next_rr(now, fail_window);
+ Debug("hostdb", "Round robin timed interval expired - index %d", this->index_of(best_alive));
+ } else { // pick the current index, which may be dead.
+ best_any = &info[this->rr_idx()];
+ }
+ Debug("hostdb", "Using timed round robin - index %d", this->index_of(best_any));
+ } else {
+ // Walk the entries and find the best (largest) hash.
+ unsigned int best_hash = 0; // any hash is better than this.
+ for (auto &target : info) {
+ unsigned int h = HOSTDB_CLIENT_IP_HASH(hash_addr, target.data.ip);
+ if (best_hash <= h) {
+ best_any = ⌖
+ best_hash = h;
+ }
+ }
+ Debug("hostdb", "Using client affinity - index %d", this->index_of(best_any));
}
- return (char *)this + hostname_offset;
-}
-
-HostDBRoundRobin *
-HostDBInfo::rr()
-{
- if (!round_robin) {
- return nullptr;
+ // If there is a base choice, search for valid target starting there.
+ // Otherwise there is no valid target in the record.
+ if (best_any && !best_alive) {
+ // Starting at the current target, search for a valid one.
+ for (unsigned short i = 0; i < rr_count; i++) {
+ auto target = &info[this->rr_idx(i)];
+ if (target->select(now, fail_window)) {
+ best_alive = target;
+ break;
+ }
+ }
}
- return reinterpret_cast(reinterpret_cast(this) + this->app.rr.offset);
+ return best_alive;
}
struct ShowHostDB;
@@ -1784,41 +1677,42 @@ struct ShowHostDB : public ShowCont {
showAllEvent(int event, Event *e)
{
if (event == EVENT_INTERVAL) {
- HostDBInfo *r = reinterpret_cast(e);
+ auto *r = reinterpret_cast(e);
if (output_json && records_seen++ > 0) {
CHECK_SHOW(show(",")); // we need to separate records
}
- showOne(r, false, event, e);
- if (r->round_robin) {
- HostDBRoundRobin *rr_data = r->rr();
- if (rr_data) {
- if (!output_json) {
- CHECK_SHOW(show("
\n"));
+
+ for (auto &item : rr_data) {
+ showOne(&item, r->record_type, event, e);
}
}
} else {
@@ -2026,9 +1896,9 @@ struct HostDBTestReverse : public Continuation {
mainEvent(int event, Event *e)
{
if (event == EVENT_HOST_DB_LOOKUP) {
- HostDBInfo *i = reinterpret_cast(e);
+ auto *i = reinterpret_cast(e);
if (i) {
- rprintf(test, "HostDBTestReverse: reversed %s\n", i->hostname());
+ rprintf(test, "HostDBTestReverse: reversed %s\n", i->name());
}
outstanding--;
}
@@ -2145,93 +2015,110 @@ HostDBFileContinuation::destroy()
// We can't allow more than one update to be
// proceeding at a time in any case so we might as well make these
// globals.
-int HostDBFileUpdateActive = 0;
+std::atomic HostDBFileUpdateActive{false};
-static void
-ParseHostLine(Ptr &map, char *l)
+/* Container for temporarily holding data from the host file. For each FQDN there is a vector of IPv4
+ * and IPv6 addresses. These are used to generate the HostDBRecord instances that are stored persistently.
+ */
+using HostAddrMap = std::unordered_map, std::vector>>;
+
+namespace
{
- Tokenizer elts(" \t");
- int n_elts = elts.Initialize(l, SHARE_TOKS);
+constexpr unsigned IPV4_IDX = 0;
+constexpr unsigned IPV6_IDX = 1;
+} // namespace
+static void
+ParseHostLine(TextView line, HostAddrMap &map)
+{
// Elements should be the address then a list of host names.
+ TextView addr_text = line.take_prefix_if(&isspace);
+ IpAddr addr;
+
// Don't use RecHttpLoadIp because the address *must* be literal.
- IpAddr ip;
- if (n_elts > 1 && 0 == ip.load(elts[0])) {
- for (int i = 1; i < n_elts; ++i) {
- ts::ConstBuffer name(elts[i], strlen(elts[i]));
- // If we don't have an entry already (host files only support single IPs for a given name)
- if (map->hosts_file_map.find(name) == map->hosts_file_map.end()) {
- map->hosts_file_map[name] = ip;
- }
+ if (TS_SUCCESS != addr.load(addr_text)) {
+ return;
+ }
+
+ while (!line.ltrim_if(&isspace).empty()) {
+ TextView name = line.take_prefix_if(&isspace);
+ if (addr.isIp6()) {
+ std::get(map[name]).push_back(addr);
+ } else if (addr.isIp4()) {
+ std::get(map[name]).push_back(addr);
}
}
}
void
-ParseHostFile(const char *path, unsigned int hostdb_hostfile_check_interval_parse)
+ParseHostFile(ts::file::path const &path, ts_seconds hostdb_hostfile_check_interval_parse)
{
- Ptr parsed_hosts_file_ptr;
+ std::shared_ptr map;
// Test and set for update in progress.
- if (0 != ink_atomic_swap(&HostDBFileUpdateActive, 1)) {
+ bool flag = false;
+ if (!HostDBFileUpdateActive.compare_exchange_strong(flag, true)) {
Debug("hostdb", "Skipped load of host file because update already in progress");
return;
}
- Debug("hostdb", "Loading host file '%s'", path);
-
- if (*path) {
- ats_scoped_fd fd(open(path, O_RDONLY));
- if (fd >= 0) {
- struct stat info;
- if (0 == fstat(fd, &info)) {
- // +1 in case no terminating newline
- int64_t size = info.st_size + 1;
-
- parsed_hosts_file_ptr = new RefCountedHostsFileMap;
- parsed_hosts_file_ptr->HostFileText = static_cast(ats_malloc(size));
- if (parsed_hosts_file_ptr->HostFileText) {
- char *base = parsed_hosts_file_ptr->HostFileText;
- char *limit;
-
- size = read(fd, parsed_hosts_file_ptr->HostFileText, info.st_size);
- limit = parsed_hosts_file_ptr->HostFileText + size;
- *limit = 0;
-
- // We need to get a list of all name/addr pairs so that we can
- // group names for round robin records. Also note that the
- // pairs have pointer back in to the text storage for the file
- // so we need to keep that until we're done with @a pairs.
- while (base < limit) {
- char *spot = strchr(base, '\n');
-
- // terminate the line.
- if (nullptr == spot) {
- spot = limit; // no trailing EOL, grab remaining
- } else {
- *spot = 0;
- }
-
- while (base < spot && isspace(*base)) {
- ++base; // skip leading ws
- }
- if (*base != '#' && base < spot) { // non-empty non-comment line
- ParseHostLine(parsed_hosts_file_ptr, base);
- }
- base = spot + 1;
- }
-
- hostdb_hostfile_update_timestamp = hostdb_current_interval;
+ Debug_bw("hostdb", R"(Loading host file "{}")", path);
+
+ if (!path.empty()) {
+ std::error_code ec;
+ std::string content = ts::file::load(path, ec);
+ if (!ec) {
+ HostAddrMap addr_map;
+ TextView text{content};
+ while (text) {
+ auto line = text.take_prefix_at('\n').ltrim_if(&isspace);
+ if (line.empty() || '#' == *line) {
+ continue;
}
+ ParseHostLine(line, addr_map);
}
+ // @a map should be loaded with all of the data, create the records.
+ map = std::make_shared();
+ // Common loading function for creating a record from the address vector.
+ auto loader = [](TextView key, std::vector const &v) -> HostDBRecord::Handle {
+ HostDBRecord::Handle record{HostDBRecord::alloc(key, v.size())};
+ record->af_family = v.front().family(); // @a v is presumed family homogenous
+ auto rr_info = record->rr_info();
+ auto spot = v.begin();
+ for (auto &item : rr_info) {
+ item.assign(*spot++);
+ }
+ return record;
+ };
+ // Walk the temporary map and create the corresponding records for the persistent map.
+ for (auto const &[key, value] : addr_map) {
+ // Bit of subtlety to be able to search records with a view and not a string - the key
+ // must point at stable memory for the name, which is available in the record itself.
+ // Therefore the lookup for adding the record must be done using a view based in the record.
+ // It doesn't matter if it's the IPv4 or IPv6 record that's used, both are stable and equal
+ // to each other.
+ // IPv4
+ if (auto const &v = std::get(value); v.size() > 0) {
+ auto r = loader(key, v);
+ (*map)[r->name_view()].record_4 = r;
+ }
+ // IPv6
+ if (auto const &v = std::get(value); v.size() > 0) {
+ auto r = loader(key, v);
+ (*map)[r->name_view()].record_6 = r;
+ }
+ }
+
+ hostdb_hostfile_update_timestamp = hostdb_current_interval;
}
}
// Swap the pointer
- if (parsed_hosts_file_ptr != nullptr) {
- hostDB.hosts_file_ptr = parsed_hosts_file_ptr;
+ if (map) {
+ std::unique_lock lock(hostDB.host_file_mutex);
+ hostDB.host_file = map;
}
// Mark this one as completed, so we can allow another update to happen
- HostDBFileUpdateActive = 0;
+ HostDBFileUpdateActive = false;
}
//
@@ -2254,7 +2141,7 @@ struct HostDBRegressionContinuation : public Continuation {
int i;
int
- mainEvent(int event, HostDBInfo *r)
+ mainEvent(int event, HostDBRecord *r)
{
(void)event;
@@ -2263,27 +2150,15 @@ struct HostDBRegressionContinuation : public Continuation {
}
if (event == EVENT_HOST_DB_LOOKUP) {
--outstanding;
- // since this is a lookup done, data is either hostdbInfo or nullptr
if (r) {
- rprintf(test, "hostdbinfo r=%x\n", r);
- char const *hname = r->perm_hostname();
- if (nullptr == hname) {
- hname = "(null)";
- }
- rprintf(test, "hostdbinfo hostname=%s\n", hname);
- rprintf(test, "hostdbinfo rr %x\n", r->rr());
+ rprintf(test, "HostDBRecord r=%x\n", r);
+ rprintf(test, "HostDBRecord hostname=%s\n", r->name());
// If RR, print all of the enclosed records
- if (r->rr()) {
- rprintf(test, "hostdbinfo good=%d\n", r->rr()->good);
- for (int x = 0; x < r->rr()->good; x++) {
- ip_port_text_buffer ip_buf;
- ats_ip_ntop(r->rr()->info(x).ip(), ip_buf, sizeof(ip_buf));
- rprintf(test, "hostdbinfo RR%d ip=%s\n", x, ip_buf);
- }
- } else { // Otherwise, just the one will do
+ auto rr_info{r->rr_info()};
+ for (int x = 0; x < r->rr_count; ++x) {
ip_port_text_buffer ip_buf;
- ats_ip_ntop(r->ip(), ip_buf, sizeof(ip_buf));
- rprintf(test, "hostdbinfo A ip=%s\n", ip_buf);
+ rr_info[x].data.ip.toString(ip_buf, sizeof(ip_buf));
+ rprintf(test, "hostdbinfo RR%d ip=%s\n", x, ip_buf);
}
++success;
} else {
@@ -2323,9 +2198,9 @@ struct HostDBRegressionContinuation : public Continuation {
static const char *dns_test_hosts[] = {
"www.apple.com", "www.ibm.com", "www.microsoft.com",
- "www.coke.com", // RR record
- "4.2.2.2", // An IP-- since we don't expect resolution
- "127.0.0.1", // loopback since it has some special handling
+ "yahoo.com", // RR record
+ "4.2.2.2", // An IP-- since we don't expect resolution
+ "127.0.0.1", // loopback since it has some special handling
};
REGRESSION_TEST(HostDBProcessor)(RegressionTest *t, int atype, int *pstatus)
@@ -2334,3 +2209,214 @@ REGRESSION_TEST(HostDBProcessor)(RegressionTest *t, int atype, int *pstatus)
}
#endif
+// -----
+void
+HostDBRecord::free()
+{
+ if (_iobuffer_index > 0) {
+ Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), this);
+ ioBufAllocator[_iobuffer_index].free_void(static_cast(this));
+ }
+}
+
+HostDBRecord *
+HostDBRecord::alloc(TextView query_name, unsigned int rr_count, size_t srv_name_size)
+{
+ const ts::Scalar<8> qn_size = ts::round_up(query_name.size() + 1);
+ const ts::Scalar<8> r_size = ts::round_up(sizeof(self_type) + qn_size + rr_count * sizeof(HostDBInfo) + srv_name_size);
+ int iobuffer_index = iobuffer_size_to_index(r_size, hostdb_max_iobuf_index);
+ ink_release_assert(iobuffer_index >= 0);
+ auto ptr = ioBufAllocator[iobuffer_index].alloc_void();
+ memset(ptr, 0, r_size);
+ auto self = static_cast(ptr);
+ new (self) self_type();
+ self->_iobuffer_index = iobuffer_index;
+ self->_record_size = r_size;
+
+ Debug("hostdb", "allocating %ld bytes for %.*s with %d RR records at [%p]", r_size.value(), int(query_name.size()),
+ query_name.data(), rr_count, self);
+
+ // where in our block of memory we are
+ int offset = sizeof(self_type);
+ memcpy(self->apply_offset(offset), query_name);
+ offset += qn_size;
+ self->rr_offset = offset;
+ self->rr_count = rr_count;
+ // Construct the info instances to a valid state.
+ for (auto &info : self->rr_info()) {
+ new (&info) std::remove_reference_t;
+ }
+
+ return self;
+}
+
+HostDBRecord::self_type *
+HostDBRecord::unmarshall(char *buff, unsigned size)
+{
+ if (size < sizeof(self_type)) {
+ return nullptr;
+ }
+ auto src = reinterpret_cast(buff);
+ ink_release_assert(size == src->_record_size);
+ auto ptr = ioBufAllocator[src->_iobuffer_index].alloc_void();
+ auto self = static_cast(ptr);
+ new (self) self_type();
+ auto delta = sizeof(RefCountObj); // skip the VFTP and ref count.
+ memcpy(static_cast(ptr) + delta, buff + delta, size - delta);
+ return self;
+}
+
+bool
+HostDBRecord::serve_stale_but_revalidate() const
+{
+ // the option is disabled
+ if (hostdb_serve_stale_but_revalidate <= 0) {
+ return false;
+ }
+
+ // ip_timeout_interval == DNS TTL
+ // hostdb_serve_stale_but_revalidate == number of seconds
+ // ip_interval() is the number of seconds between now() and when the entry was inserted
+ if ((ip_timeout_interval + ts_seconds(hostdb_serve_stale_but_revalidate)) > ip_interval()) {
+ Debug_bw("hostdb", "serving stale entry {} | {} | {} as requested by config", ip_timeout_interval,
+ hostdb_serve_stale_but_revalidate, ip_interval());
+ return true;
+ }
+
+ // otherwise, the entry is too old
+ return false;
+}
+
+HostDBInfo *
+HostDBRecord::select_best_srv(char *target, InkRand *rand, ts_time now, ts_seconds fail_window)
+{
+ ink_assert(rr_count <= 0 || static_cast(rr_count) > hostdb_round_robin_max_count);
+
+ int i = 0;
+ int live_n = 0;
+ uint32_t weight = 0, p = INT32_MAX;
+ HostDBInfo *result = nullptr;
+ auto rr = this->rr_info();
+ // Array of live targets, sized by @a live_n
+ HostDBInfo *live[rr.count()];
+ for (auto &target : rr) {
+ // skip dead upstreams.
+ if (rr[i].is_dead(now, fail_window)) {
+ continue;
+ }
+
+ if (target.data.srv.srv_priority <= p) {
+ p = target.data.srv.srv_priority;
+ weight += target.data.srv.srv_weight;
+ live[live_n++] = ⌖
+ } else {
+ break;
+ }
+ };
+
+ if (live_n == 0 || weight == 0) { // no valid or weighted choice, use strict RR
+ result = this->select_next_rr(now, fail_window);
+ } else {
+ uint32_t xx = rand->random() % weight;
+ for (i = 0; i < live_n - 1 && xx >= live[i]->data.srv.srv_weight; ++i)
+ xx -= live[i]->data.srv.srv_weight;
+
+ result = live[i];
+ }
+
+ if (result) {
+ ink_strlcpy(target, this->name(), MAXDNAME);
+ return result;
+ }
+ return nullptr;
+}
+
+HostDBInfo *
+HostDBRecord::select_next_rr(ts_time now, ts_seconds fail_window)
+{
+ auto rr_info = this->rr_info();
+ for (unsigned idx = 0, limit = rr_info.count(); idx < limit; ++idx) {
+ auto &target = rr_info[this->next_rr()];
+ if (target.select(now, fail_window)) {
+ return ⌖
+ }
+ }
+
+ return nullptr;
+}
+
+unsigned
+HostDBRecord::next_rr()
+{
+ auto raw_idx = ++_rr_idx;
+ // Modulus on an atomic is a bit tricky - need to make sure the value is always decremented by the
+ // modulus even if another thread incremented. Update to modulus value iff the value hasn't been
+ // incremented elsewhere. Eventually the "last" incrementer will do the update.
+ auto idx = raw_idx % rr_count;
+ _rr_idx.compare_exchange_weak(raw_idx, idx);
+ return idx;
+}
+
+HostDBInfo *
+HostDBRecord::find(sockaddr const *addr)
+{
+ for (auto &item : this->rr_info()) {
+ if (item.data.ip == addr) {
+ return &item;
+ }
+ }
+ return nullptr;
+}
+
+bool
+ResolveInfo::resolve_immediate()
+{
+ if (resolved_p) {
+ // nothing - already resolved.
+ } else if (IpAddr tmp; TS_SUCCESS == tmp.load(lookup_name)) {
+ ts::bwprint(ts::bw_dbg, "[resolve_immediate] success - FQDN '{}' is a valid IP address.", lookup_name);
+ Debug("hostdb", "%s", ts::bw_dbg.c_str());
+ addr.assign(tmp);
+ resolved_p = true;
+ }
+ return resolved_p;
+}
+
+bool
+ResolveInfo::set_active(HostDBInfo *info)
+{
+ active = info;
+ if (info) {
+ addr.assign(active->data.ip);
+ resolved_p = true;
+ return true;
+ }
+ resolved_p = false;
+ return false;
+}
+
+bool
+ResolveInfo::select_next_rr()
+{
+ if (active) {
+ if (auto rr_info{this->record->rr_info()}; rr_info.count() > 1) {
+ unsigned limit = active - rr_info.data(), idx = (limit + 1) % rr_info.count();
+ while ((idx = (idx + 1) % rr_info.count()) != limit && !rr_info[idx].is_alive())
+ ;
+ active = &rr_info[idx];
+ return idx != limit; // if the active record was actually changed.
+ }
+ }
+ return false;
+}
+
+bool
+ResolveInfo::set_upstream_address(IpAddr const &ip_addr)
+{
+ if (ip_addr.isValid()) {
+ addr.assign(ip_addr);
+ resolved_p = true;
+ return true;
+ }
+ return false;
+}
diff --git a/iocore/hostdb/I_HostDBProcessor.h b/iocore/hostdb/I_HostDBProcessor.h
index 0be0a81b4a1..e08fde4fd54 100644
--- a/iocore/hostdb/I_HostDBProcessor.h
+++ b/iocore/hostdb/I_HostDBProcessor.h
@@ -23,10 +23,14 @@
#pragma once
+#include
+#include
+
#include "tscore/HashFNV.h"
#include "tscore/ink_time.h"
#include "tscore/CryptoHash.h"
#include "tscore/ink_align.h"
+#include "tscore/ink_inet.h"
#include "tscore/ink_resolver.h"
#include "tscore/HTTPVersion.h"
#include "I_EventSystem.h"
@@ -46,6 +50,7 @@
// Data
//
struct HostDBContinuation;
+struct ResolveInfo;
//
// The host database stores host information, most notably the
@@ -56,7 +61,7 @@ struct HostDBContinuation;
// disk representation to decrease # of seeks.
//
extern int hostdb_enable;
-extern ink_time_t hostdb_current_interval;
+extern ts_time hostdb_current_interval;
extern unsigned int hostdb_ip_stale_interval;
extern unsigned int hostdb_ip_timeout_interval;
extern unsigned int hostdb_ip_fail_timeout_interval;
@@ -84,339 +89,415 @@ makeHostHash(const char *string)
// Types
//
-/** Host information metadata used by various parts of HostDB.
- * It is stored as generic data in the cache.
- *
- * As a @c union only one of the members is valid, Which one depends on context data in the
- * @c HostDBInfo. This data is written literally to disk therefore if any change is made,
- * the @c object_version for the cache must be updated by modifying @c HostDBInfo::version.
- *
- * @see HostDBInfo::version
- */
-union HostDBApplicationInfo {
- /// Generic storage. This is verified to be the size of the union.
- struct application_data_allotment {
- unsigned int application1;
- unsigned int application2;
- } allotment;
-
- //////////////////////////////////////////////////////////
- // http server attributes in the host database //
- // //
- // http_version - one of HTTPVersion //
- // last_failure - UNIX time for the last time //
- // we tried the server & failed //
- // fail_count - Number of times we tried and //
- // and failed to contact the host //
- //////////////////////////////////////////////////////////
- struct http_server_attr {
- uint32_t last_failure;
- HTTPVersion http_version;
- uint8_t fail_count;
- http_server_attr() : http_version() {}
- } http_data;
-
- struct application_data_rr {
- unsigned int offset;
- } rr;
- HostDBApplicationInfo() : http_data() {}
-};
-
-struct HostDBRoundRobin;
+class HostDBRecord;
+/// Information for an SRV record.
struct SRVInfo {
- unsigned int srv_offset : 16;
+ unsigned int srv_offset : 16; ///< Memory offset from @c HostDBInfo to name.
unsigned int srv_weight : 16;
unsigned int srv_priority : 16;
unsigned int srv_port : 16;
unsigned int key;
};
-struct HostDBInfo : public RefCountObj {
- /** Internal IP address data.
- This is at least large enough to hold an IPv6 address.
- */
+/// Type of data stored.
+enum class HostDBType : uint8_t {
+ UNSPEC, ///< No valid data.
+ ADDR, ///< IP address.
+ SRV, ///< SRV record.
+ HOST ///< Hostname (reverse DNS)
+};
+char const *name_of(HostDBType t);
- static HostDBInfo *
- alloc(int size = 0)
- {
- size += sizeof(HostDBInfo);
- int iobuffer_index = iobuffer_size_to_index(size, hostdb_max_iobuf_index);
- ink_release_assert(iobuffer_index >= 0);
- void *ptr = ioBufAllocator[iobuffer_index].alloc_void();
- memset(ptr, 0, size);
- HostDBInfo *ret = new (ptr) HostDBInfo();
- ret->_iobuffer_index = iobuffer_index;
- return ret;
- }
+/** Information about a single target.
+ */
+struct HostDBInfo {
+ using self_type = HostDBInfo; ///< Self reference type.
- void
- free() override
- {
- ink_release_assert(from_alloc());
- Debug("hostdb", "freeing %d bytes at [%p]", (1 << (7 + _iobuffer_index)), this);
- ioBufAllocator[_iobuffer_index].free_void((void *)(this));
- }
+ /// Default constructor.
+ HostDBInfo() = default;
- /// Effectively the @c object_version for cache data.
- /// This is used to indicate incompatible changes in the binary layout of HostDB records.
- /// It must be updated if any such change is made, even if it is functionally equivalent.
- static ts::VersionNumber
- version()
- {
- /// - 1.0 Initial version.
- /// - 1.1 tweak HostDBApplicationInfo::http_data.
- return ts::VersionNumber(1, 1);
- }
+ HostDBInfo &operator=(HostDBInfo const &that);
- static HostDBInfo *
- unmarshall(char *buf, unsigned int size)
- {
- if (size < sizeof(HostDBInfo)) {
- return nullptr;
- }
- HostDBInfo *ret = HostDBInfo::alloc(size - sizeof(HostDBInfo));
- int buf_index = ret->_iobuffer_index;
- memcpy((void *)ret, buf, size);
- // Reset the refcount back to 0, this is a bit ugly-- but I'm not sure we want to expose a method
- // to mess with the refcount, since this is a fairly unique use case
- ret = new (ret) HostDBInfo();
- ret->_iobuffer_index = buf_index;
- return ret;
- }
+ /// Absolute time of when this target failed.
+ /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
+ ts_time last_fail_time() const;
- // return expiry time (in seconds since epoch)
- ink_time_t
- expiry_time() const
- {
- return ip_timestamp + ip_timeout_interval + hostdb_serve_stale_but_revalidate;
- }
+ /// Target is alive - no known failure.
+ bool is_alive();
- sockaddr *
- ip()
- {
- return &data.ip.sa;
- }
+ /// Target has failed and is still in the blocked time window.
+ bool is_dead(ts_time now, ts_seconds fail_window);
- sockaddr const *
- ip() const
- {
- return &data.ip.sa;
- }
+ /** Select this target.
+ *
+ * @param now Current time.
+ * @param fail_window Failure window.
+ * @return Status of the selection.
+ *
+ * If a zombie is selected the failure time is updated to make it look dead to other threads in a thread safe
+ * manner. The caller should check @c last_fail_time to see if a zombie was selected.
+ */
+ bool select(ts_time now, ts_seconds fail_window);
- char *hostname() const;
- char *perm_hostname() const;
- char *srvname(HostDBRoundRobin *rr) const;
+ /// Check if this info is valid.
+ bool is_valid() const;
- /// Check if this entry is an element of a round robin entry.
- /// If @c true then this entry is part of and was obtained from a round robin root. This is useful if the
- /// address doesn't work - a retry can probably get a new address by doing another lookup and resolving to
- /// a different element of the round robin.
- bool
- is_rr_elt() const
- {
- return 0 != round_robin_elt;
- }
+ /// Mark this info as invalid.
+ void invalidate();
- HostDBRoundRobin *rr();
+ /** Mark the entry as down.
+ *
+ * @param now Time of the failure.
+ * @return @c true if @a this was marked down, @c false if not.
+ *
+ * This can return @c false if the entry is already marked down, in which case the failure time is not updated.
+ */
+ bool mark_down(ts_time now);
- unsigned int
- ip_interval() const
- {
- return (hostdb_current_interval - ip_timestamp) & 0x7FFFFFFF;
- }
+ /** Mark the target as up / alive.
+ *
+ * @return Previous alive state of the target.
+ */
+ bool mark_up();
- int
- ip_time_remaining() const
- {
- return static_cast(ip_timeout_interval) - static_cast(this->ip_interval());
- }
+ char const *srvname() const;
- bool
- is_ip_stale() const
- {
- return ip_timeout_interval >= 2 * hostdb_ip_stale_interval && ip_interval() >= hostdb_ip_stale_interval;
- }
+ /** Migrate data after a DNS update.
+ *
+ * @param that Source item.
+ *
+ * This moves only specific state information, it is not a generic copy.
+ */
+ void migrate_from(self_type const &that);
- bool
- is_ip_timeout() const
- {
- return ip_interval() >= ip_timeout_interval;
- }
+ /// A target is either an IP address or an SRV record.
+ /// The type should be indicated by @c flags.f.is_srv;
+ union {
+ IpAddr ip; ///< IP address / port data.
+ SRVInfo srv; ///< SRV record.
+ } data{IpAddr{}};
+
+ /// Data that migrates after updated DNS records are processed.
+ /// @see migrate_from
+ /// @{
+ /// Last time a failure was recorded.
+ std::atomic last_failure{TS_TIME_ZERO};
+ /// Count of connection failures
+ std::atomic fail_count{0};
+ /// Expected HTTP version of the target based on earlier transactions.
+ HTTPVersion http_version = HTTP_INVALID;
+ /// @}
+
+ self_type &assign(IpAddr const &addr);
+
+protected:
+ self_type &assign(sa_family_t af, void const *addr);
+ self_type &assign(SRV const *srv, char const *name);
+
+ HostDBType type = HostDBType::UNSPEC; ///< Invalid data.
+
+ friend HostDBContinuation;
+};
- bool
- is_ip_fail_timeout() const
- {
- return ip_interval() >= hostdb_ip_fail_timeout_interval;
+inline HostDBInfo &
+HostDBInfo::operator=(HostDBInfo const &that)
+{
+ if (this != &that) {
+ memcpy(static_cast(this), static_cast(&that), sizeof(*this));
}
+ return *this;
+}
- void
- refresh_ip()
- {
- ip_timestamp = hostdb_current_interval;
- }
+inline ts_time
+HostDBInfo::last_fail_time() const
+{
+ return last_failure;
+}
- bool
- serve_stale_but_revalidate() const
- {
- // the option is disabled
- if (hostdb_serve_stale_but_revalidate <= 0) {
- return false;
- }
+inline bool
+HostDBInfo::is_alive()
+{
+ return this->last_fail_time() == TS_TIME_ZERO;
+}
- // ip_timeout_interval == DNS TTL
- // hostdb_serve_stale_but_revalidate == number of seconds
- // ip_interval() is the number of seconds between now() and when the entry was inserted
- if ((ip_timeout_interval + hostdb_serve_stale_but_revalidate) > ip_interval()) {
- Debug("hostdb", "serving stale entry %d | %d | %d as requested by config", ip_timeout_interval,
- hostdb_serve_stale_but_revalidate, ip_interval());
- return true;
- }
+inline bool
+HostDBInfo::is_dead(ts_time now, ts_seconds fail_window)
+{
+ auto last_fail = this->last_fail_time();
+ return (last_fail != TS_TIME_ZERO) && (last_fail + fail_window < now);
+}
+
+inline bool
+HostDBInfo::mark_up()
+{
+ auto t = last_failure.exchange(TS_TIME_ZERO);
+ return t != TS_TIME_ZERO;
+}
+
+inline bool
+HostDBInfo::mark_down(ts_time now)
+{
+ auto t0{TS_TIME_ZERO};
+ return last_failure.compare_exchange_strong(t0, now);
+}
- // otherwise, the entry is too old
- return false;
+inline bool
+HostDBInfo::select(ts_time now, ts_seconds fail_window)
+{
+ auto t0 = this->last_fail_time();
+ if (t0 == TS_TIME_ZERO) {
+ return true; // it's alive and so is valid for selection.
}
+ // Success means this is a zombie and this thread updated the failure time.
+ return (t0 + fail_window < now) && last_failure.compare_exchange_strong(t0, now);
+}
+
+inline void
+HostDBInfo::migrate_from(HostDBInfo::self_type const &that)
+{
+ this->last_failure = that.last_failure.load();
+ this->http_version = that.http_version;
+}
- /*
- * Given the current time `now` and the fail_window, determine if this real is alive
+inline bool
+HostDBInfo::is_valid() const
+{
+ return type != HostDBType::UNSPEC;
+}
+
+inline void
+HostDBInfo::invalidate()
+{
+ type = HostDBType::UNSPEC;
+}
+
+// ----
+/** Root item for HostDB.
+ * This is the container for HostDB data. It is always an array of @c HostDBInfo instances plus metadata.
+ * All strings are C-strings and therefore don't need a distinct size.
+ *
+ */
+class HostDBRecord : public RefCountObj
+{
+ friend struct HostDBContinuation;
+ friend struct ShowHostDB;
+ using self_type = HostDBRecord;
+
+ /// Size of the IO buffer block owned by @a this.
+ /// If negative @a this is in not allocated memory.
+ int _iobuffer_index{-1};
+ /// Actual size of the data.
+ unsigned _record_size = sizeof(self_type);
+
+public:
+ HostDBRecord() = default;
+ HostDBRecord(self_type const &that) = delete;
+
+ using Handle = Ptr; ///< Shared pointer type to hold an instance.
+
+ /** Allocate an instance from the IOBuffers.
+ *
+ * @param query_name Name of the query for the record.
+ * @param rr_count Number of info instances.
+ * @param srv_name_size Storage for SRV names, if any.
+ * @return An instance sufficient to hold the specified data.
+ *
+ * The query name will stored and initialized, and the info instances initialized.
*/
- bool
- is_alive(ink_time_t now, int32_t fail_window)
- {
- unsigned int last_failure = app.http_data.last_failure;
-
- if (last_failure == 0 || (unsigned int)(now - fail_window) > last_failure) {
- return true;
- } else {
- // Entry is marked down. Make sure some nasty clock skew
- // did not occur. Use the retry time to set an upper bound
- // as to how far in the future we should tolerate bogus last
- // failure times. This sets the upper bound that we would ever
- // consider a server down to 2*down_server_timeout
- if ((unsigned int)(now + fail_window) < last_failure) {
- app.http_data.last_failure = 0;
- return false;
- }
- return false;
- }
- }
+ static self_type *alloc(ts::TextView query_name, unsigned rr_count, size_t srv_name_size = 0);
- bool
- is_failed() const
- {
- return !((is_srv && data.srv.srv_offset) || (reverse_dns && data.hostname_offset) || ats_is_ip(ip()));
- }
+ /// Type of data stored in this record.
+ HostDBType record_type = HostDBType::UNSPEC;
- void
- set_failed()
- {
- if (is_srv) {
- data.srv.srv_offset = 0;
- } else if (reverse_dns) {
- data.hostname_offset = 0;
- } else {
- ats_ip_invalidate(ip());
- }
- }
+ /// IP family of this record.
+ sa_family_t af_family = AF_UNSPEC;
- uint64_t key{0};
+ /// Offset from @a this to the VLA.
+ unsigned short rr_offset = 0;
- // Application specific data. NOTE: We need an integral number of
- // these per block. This structure is 32 bytes. (at 200k hosts =
- // 8 Meg). Which gives us 7 bytes of application information.
- HostDBApplicationInfo app;
+ /// Number of @c HostDBInfo instances.
+ unsigned short rr_count = 0;
- union {
- IpEndpoint ip; ///< IP address / port data.
- unsigned int hostname_offset; ///< Some hostname thing.
- SRVInfo srv;
- } data;
+ /// Timing data for switch records in the RR.
+ std::atomic rr_ctime{TS_TIME_ZERO};
+
+ /// Hash key.
+ uint64_t key;
+
+ /// When the data was received.
+ ts_time ip_timestamp;
- unsigned int hostname_offset{0}; // always maintain a permanent copy of the hostname for non-rev dns records.
+ /// Valid duration of the data.
+ ts_seconds ip_timeout_interval;
- unsigned int ip_timestamp{0};
+ /** Atomically advance the round robin index.
+ *
+ * If multiple threads call this simultaneously each thread will get a distinct return value.
+ *
+ * @return The new round robin index.
+ */
+ unsigned next_rr();
+
+ /** Pick the next round robin and update the record atomically.
+ *
+ * @note This may select a zombie server and reserve it for the caller, therefore the caller must
+ * attempt to connect to the selected target if possible.
+ *
+ * @param now Current time to use for aliveness calculations.
+ * @param fail_window Blackout time for dead servers.
+ * @return Status of the updated target.
+ *
+ * If the return value is @c HostDBInfo::Status::DEAD this means all targets are dead and there is
+ * no valid upstream.
+ *
+ * @note Concurrency - this is not done under lock and depends on the caller for correct use.
+ * For strict round robin, it is a feature that every call will get a distinct index. For
+ * timed round robin, the caller must arrange to have only one thread call this per time interval.
+ */
+ HostDBInfo *select_next_rr(ts_time now, ts_seconds fail_window);
- unsigned int ip_timeout_interval{0}; // bounded between 1 and HOST_DB_MAX_TTL (0x1FFFFF, 24 days)
+ /// Check if this record is of SRV targets.
+ bool is_srv() const;
- unsigned int is_srv : 1;
- unsigned int reverse_dns : 1;
+ /** Query name for the record.
+ * @return A C-string.
+ * If this is a @c HOST record, this is the resolved named and the query was based on the IP address.
+ * Otherwise this is the name used in the DNS query.
+ */
+ char const *name() const;
+
+ /** Query name for the record.
+ * @return A view.
+ * If this is a @c HOST record, this is the resolved named and the query was based on the IP address.
+ * Otherwise this is the name used in the DNS query.
+ * @note Although not included in the view, the name is always nul terminated and the string can
+ * be used as a C-string.
+ */
+ ts::TextView name_view() const;
- unsigned int round_robin : 1; // This is the root of a round robin block
- unsigned int round_robin_elt : 1; // This is an address in a round robin block
+ /// Get the array of info instances.
+ ts::MemSpan rr_info();
- HostDBInfo() : _iobuffer_index{-1} {}
+ /** Find a host record by IP address.
+ *
+ * @param addr Address key.
+ * @return A pointer to the info instance if a match is found, @c nullptr if not.
+ */
+ HostDBInfo *find(sockaddr const *addr);
+
+ /** Select an upstream target.
+ *
+ * @param now Current time.
+ * @param fail_window Dead server blackout time.
+ * @param hash_addr Inbound remote IP address.
+ * @return A selected target, or @c nullptr if there are no valid targets.
+ *
+ * This accounts for the round robin setting. The default is to use "client affinity" in
+ * which case @a hash_addr is as a hash seed to select the target.
+ *
+ * This may select a zombie target, which can be detected by checking the target's last
+ * failure time. If it is not @c TS_TIME_ZERO the target is a zombie. Other transactions will
+ * be blocked from selecting that target until @a fail_window time has passed.
+ *
+ * In cases other than strict round robin, a base target is selected. If valid, that is returned,
+ * but if not then the targets in this record are searched until a valid one is found. The result
+ * is this can be called to select a target for failover when a previous target fails.
+ */
+ HostDBInfo *select_best_http(ts_time now, ts_seconds fail_window, sockaddr const *hash_addr);
+ HostDBInfo *select_best_srv(char *target, InkRand *rand, ts_time now, ts_seconds fail_window);
- HostDBInfo(HostDBInfo const &src) : RefCountObj()
- {
- memcpy(static_cast(this), static_cast(&src), sizeof(*this));
- _iobuffer_index = -1;
- }
+ bool is_failed() const;
- HostDBInfo &
- operator=(HostDBInfo const &src)
- {
- if (this != &src) {
- int iob_idx = _iobuffer_index;
- memcpy(static_cast(this), static_cast(&src), sizeof(*this));
- _iobuffer_index = iob_idx;
- }
- return *this;
- }
+ void set_failed();
- bool
- from_alloc() const
- {
- return _iobuffer_index >= 0;
- }
+ /// @return The time point when the item expires.
+ ts_time expiry_time() const;
-private:
- // The value of this will be -1 for objects that are not created by the alloc() static member function.
- int _iobuffer_index;
-};
+ ts_seconds ip_interval() const;
-struct HostDBRoundRobin {
- /** Total number (to compute space used). */
- short rrcount = 0;
+ ts_seconds ip_time_remaining() const;
- /** Number which have not failed a connect. */
- short good = 0;
+ bool is_ip_stale() const;
- unsigned short current = 0;
- ink_time_t timed_rr_ctime = 0;
+ bool is_ip_timeout() const;
- // This is the equivalent of a variable length array, we can't use a VLA because
- // HostDBInfo is a non-POD type-- so this is the best we can do.
- HostDBInfo &
- info(short n)
+ bool is_ip_fail_timeout() const;
+
+ void refresh_ip();
+
+ bool serve_stale_but_revalidate() const;
+
+ /// Deallocate @a this.
+ void free() override;
+
+ /** The current round robin index.
+ *
+ * @return The current index.
+ *
+ * @note The internal index may be out of range due to concurrency constraints - this insures the
+ * returned valu is in range.
+ */
+ unsigned short rr_idx() const;
+
+ /** Offset from the current round robin index.
+ *
+ * @param delta Distance from the current index.
+ * @return The effective index.
+ */
+ unsigned short rr_idx(unsigned short delta) const;
+
+ /// The index of @a target in this record.
+ int index_of(HostDBInfo const *target) const;
+
+ /** Allocation and initialize an instance from a serialized buffer.
+ *
+ * @param buff Serialization data.
+ * @param size Size of @a buff.
+ * @return An instance initialized from @a buff.
+ */
+ static self_type *unmarshall(char *buff, unsigned size);
+
+ /// Database version.
+ static constexpr ts::VersionNumber Version{3, 0};
+
+protected:
+ /// Current active info.
+ /// @note This value may be out of range due to the difficulty of synchronization, therefore
+ /// must always be taken modulus @c rr_count when used. Use the @c rr_idx() method unless
+ /// raw access is required.
+ std::atomic _rr_idx = 0;
+
+ /** Access an internal object at @a offset.
+ *
+ * @tparam T Type of object.
+ * @param offset Offset of object.
+ * @return A pointer to the object of type @a T.
+ *
+ * @a offset is applied to @a this record and the result cast to a pointer to @a T.
+ *
+ * @note @a offset based at @a this.
+ */
+ template
+ T *
+ apply_offset(unsigned offset)
{
- ink_assert(n < rrcount && n >= 0);
- return *((HostDBInfo *)((char *)this + sizeof(HostDBRoundRobin)) + n);
+ return reinterpret_cast(reinterpret_cast(this) + offset);
}
- // Return the allocation size of a HostDBRoundRobin struct suitable for storing
- // "count" HostDBInfo records.
- static unsigned
- size(unsigned count, unsigned srv_len = 0)
+ template
+ T const *
+ apply_offset(unsigned offset) const
{
- ink_assert(count > 0);
- return INK_ALIGN((sizeof(HostDBRoundRobin) + (count * sizeof(HostDBInfo)) + srv_len), 8);
+ return reinterpret_cast(reinterpret_cast(this) + offset);
}
- /** Find the index of @a addr in member @a info.
- @return The index if found, -1 if not found.
- */
- int index_of(sockaddr const *addr);
- HostDBInfo *find_ip(sockaddr const *addr);
- // Find the srv target
- HostDBInfo *find_target(const char *target);
- /** Select the next entry after @a addr.
- @note If @a addr isn't an address in the round robin nothing is updated.
- @return The selected entry or @c nullptr if @a addr wasn't present.
- */
- HostDBInfo *select_next(sockaddr const *addr);
- HostDBInfo *select_best_http(sockaddr const *client_ip, ink_time_t now, int32_t fail_window);
- HostDBInfo *select_best_srv(char *target, InkRand *rand, ink_time_t now, int32_t fail_window);
- HostDBRoundRobin() {}
+ union {
+ uint16_t all;
+ struct {
+ unsigned failed_p : 1; ///< DNS error.
+ } f;
+ } flags{0};
};
struct HostDBCache;
@@ -424,10 +505,123 @@ struct HostDBHash;
// Prototype for inline completion function or
// getbyname_imm()
-typedef void (Continuation::*cb_process_result_pfn)(HostDBInfo *r);
+using cb_process_result_pfn = void (Continuation::*)(HostDBRecord *r);
Action *iterate(Continuation *cont);
+/** Information for doing host resolution for a request.
+ *
+ * This is effectively a state object for a request attempting to connect upstream. Information about its attempt
+ * that are local to the request are kept here, while shared data is accessed via the @c HostDBInfo pointers.
+ *
+ * A primitive version of the IP address generator concept.
+ */
+struct ResolveInfo {
+ using self_type = ResolveInfo; ///< Self reference type.
+
+ /// Not quite sure what this is for.
+ enum UpstreamResolveStyle { UNDEFINED_LOOKUP, ORIGIN_SERVER, PARENT_PROXY, HOST_NONE };
+
+ /** Origin server address source selection.
+
+ If config says to use CTA (client target addr) state is TRY_CLIENT, otherwise it
+ remains the default. If the connect fails then we switch to a USE. We go to USE_HOSTDB if (1)
+ the HostDB lookup is successful and (2) some address other than the CTA is available to try.
+ Otherwise we keep retrying on the CTA (USE_CLIENT) up to the max retry value. In essence we
+ try to treat the CTA as if it were another RR value in the HostDB record.
+ */
+ enum class OS_Addr {
+ TRY_DEFAULT, ///< Initial state, use what config says.
+ TRY_HOSTDB, ///< Try HostDB data.
+ TRY_CLIENT, ///< Try client target addr.
+ USE_HOSTDB, ///< Force use of HostDB target address.
+ USE_CLIENT, ///< Force client target addr.
+ USE_API ///< Use the API provided address.
+ };
+
+ ResolveInfo() = default;
+ ~ResolveInfo() = default;
+
+ /// Keep a reference to the base HostDB object, so it doesn't get GC'd.
+ Ptr record;
+ HostDBInfo *active = nullptr; ///< Active host record.
+
+ /// Working address. The meaning / source of the value depends on other elements.
+ /// This is the "resolved" address if @a resolved_p is @c true.
+ IpEndpoint addr;
+
+ int attempts = 0; ///< Number of connection attempts.
+
+ char const *lookup_name = nullptr;
+ char srv_hostname[MAXDNAME] = {0};
+ const sockaddr *inbound_remote_addr = nullptr; ///< Remote address of inbound client - used for hashing.
+ in_port_t srv_port = 0; ///< Port from SRV lookup or API call.
+
+ OS_Addr os_addr_style = OS_Addr::TRY_DEFAULT;
+ HostResStyle host_res_style = HOST_RES_IPV4;
+ UpstreamResolveStyle looking_up = UNDEFINED_LOOKUP;
+
+ HTTPVersion http_version = HTTP_INVALID;
+
+ bool resolved_p = false; ///< If there is a valid, resolved address in @a addr.
+
+ /// Flag for @a addr being set externally.
+ // bool api_addr_set_p = false;
+
+ /*** Set to true by default. If use_client_target_address is set
+ * to 1, this value will be set to false if the client address is
+ * not in the DNS pool */
+ bool cta_validated_p = true;
+
+ bool set_active(HostDBInfo *info);
+
+ bool set_active(sockaddr const *s);
+
+ bool set_active(std::nullptr_t);
+
+ /** Force a resolved address.
+ *
+ * @param sa Address to use for the upstream.
+ * @return @c true if successful, @c false if error.
+ *
+ * This fails if @a sa isn't a valid IP address.
+ */
+ bool set_upstream_address(const sockaddr *sa);
+
+ bool set_upstream_address(IpAddr const &ip_addr);
+
+ void set_upstream_port(in_port_t port);
+
+ /** Check and (if possible) immediately resolve the upstream address without consulting the HostDB.
+ * The cases where this is successful are
+ * - The address is already resolved (@a resolved_p is @c true).
+ * - The upstream was set explicitly.
+ * - The hostname is a valid IP address.
+ *
+ * @return @c true if the upstream address was resolved, @c false if not.
+ */
+ bool resolve_immediate();
+
+ /** Mark the active target as dead.
+ *
+ * @param now Time of failure.
+ * @return @c true if the server was marked as dead, @c false if not.
+ *
+ */
+ bool mark_active_server_dead(ts_time now);
+
+ /** Mark the active target as alive.
+ *
+ * @return @c true if the target changed state.
+ */
+ bool mark_active_server_alive();
+
+ /// Select / resolve to the next RR entry for the record.
+ bool select_next_rr();
+
+ bool is_srv() const;
+};
+
/** The Host Database access interface. */
struct HostDBProcessor : public Processor {
friend struct HostDBSync;
@@ -486,29 +680,6 @@ struct HostDBProcessor : public Processor {
/** Lookup Hostinfo by addr */
Action *getbyaddr_re(Continuation *cont, sockaddr const *aip);
- /** Set the application information (fire-and-forget). */
- void
- setbyname_appinfo(char *hostname, int len, int port, HostDBApplicationInfo *app)
- {
- sockaddr_in addr;
- ats_ip4_set(&addr, INADDR_ANY, port);
- setby(hostname, len, ats_ip_sa_cast(&addr), app);
- }
-
- void
- setbyaddr_appinfo(sockaddr const *addr, HostDBApplicationInfo *app)
- {
- this->setby(nullptr, 0, addr, app);
- }
-
- void
- setbyaddr_appinfo(in_addr_t ip, HostDBApplicationInfo *app)
- {
- sockaddr_in addr;
- ats_ip4_set(&addr, ip);
- this->setby(nullptr, 0, ats_ip_sa_cast(&addr), app);
- }
-
/** Configuration. */
static int hostdb_strict_round_robin;
static int hostdb_timed_round_robin;
@@ -524,21 +695,151 @@ struct HostDBProcessor : public Processor {
private:
Action *getby(Continuation *cont, cb_process_result_pfn cb_process_result, HostDBHash &hash, Options const &opt);
+};
-public:
- /** Set something.
- @a aip can carry address and / or port information. If setting just
- by a port value, the address should be set to INADDR_ANY which is of
- type IPv4.
- */
- void setby(const char *hostname, ///< Hostname.
- int len, ///< Length of hostname.
- sockaddr const *aip, ///< Address and/or port.
- HostDBApplicationInfo *app ///< I don't know.
- );
+inline bool
+HostDBRecord::is_srv() const
+{
+ return HostDBType::SRV == record_type;
+}
- void setby_srv(const char *hostname, int len, const char *target, HostDBApplicationInfo *app);
-};
+inline char const *
+HostDBRecord::name() const
+{
+ return this->apply_offset(sizeof(self_type));
+}
+
+inline ts::TextView
+HostDBRecord::name_view() const
+{
+ return {this->name(), ts::TextView::npos};
+}
+
+inline ts_time
+HostDBRecord::expiry_time() const
+{
+ return ip_timestamp + ip_timeout_interval + ts_seconds(hostdb_serve_stale_but_revalidate);
+}
+
+inline ts_seconds
+HostDBRecord::ip_interval() const
+{
+ static constexpr ts_seconds ZERO{0};
+ static constexpr ts_seconds MAX{0x7FFFFFFF};
+ return std::clamp(std::chrono::duration_cast((hostdb_current_interval - ip_timestamp)), ZERO, MAX);
+}
+
+inline ts_seconds
+HostDBRecord::ip_time_remaining() const
+{
+ return ip_timeout_interval - this->ip_interval();
+}
+
+inline bool
+HostDBRecord::is_ip_stale() const
+{
+ return ip_timeout_interval >= ts_seconds(2 * hostdb_ip_stale_interval) && ip_interval() >= ts_seconds(hostdb_ip_stale_interval);
+}
+
+inline bool
+HostDBRecord::is_ip_timeout() const
+{
+ return ip_interval() >= ip_timeout_interval;
+}
+
+inline bool
+HostDBRecord::is_ip_fail_timeout() const
+{
+ return ip_interval() >= ts_seconds(hostdb_ip_fail_timeout_interval);
+}
+
+inline void
+HostDBRecord::refresh_ip()
+{
+ ip_timestamp = hostdb_current_interval;
+}
+
+inline ts::MemSpan
+HostDBRecord::rr_info()
+{
+ return {this->apply_offset(rr_offset), rr_count};
+}
+
+inline bool
+HostDBRecord::is_failed() const
+{
+ return flags.f.failed_p;
+}
+
+inline void
+HostDBRecord::set_failed()
+{
+ flags.f.failed_p = true;
+}
+
+inline unsigned short
+HostDBRecord::rr_idx() const
+{
+ return _rr_idx % rr_count;
+}
+
+inline unsigned short
+HostDBRecord::rr_idx(unsigned short delta) const
+{
+ return (_rr_idx + delta) % rr_count;
+}
+
+inline int
+HostDBRecord::index_of(HostDBInfo const *target) const
+{
+ return target ? target - this->apply_offset(rr_offset) : -1;
+}
+
+// --
+
+inline bool
+ResolveInfo::set_active(sockaddr const *s)
+{
+ return this->set_active(record->find(s));
+}
+
+inline bool
+ResolveInfo::mark_active_server_alive()
+{
+ return active->mark_up();
+}
+
+inline bool
+ResolveInfo::mark_active_server_dead(ts_time now)
+{
+ return active != nullptr && active->mark_down(now);
+}
+
+inline bool ResolveInfo::set_active(std::nullptr_t)
+{
+ active = nullptr;
+ resolved_p = false;
+ return false;
+}
+
+inline bool
+ResolveInfo::set_upstream_address(sockaddr const *sa)
+{
+ return resolved_p = addr.assign(sa).isValid();
+}
+
+inline void
+ResolveInfo::set_upstream_port(in_port_t port)
+{
+ srv_port = port;
+}
+
+inline bool
+ResolveInfo::is_srv() const
+{
+ return record && record->is_srv();
+}
+// ---
void run_HostDBTest();
diff --git a/iocore/hostdb/P_HostDBProcessor.h b/iocore/hostdb/P_HostDBProcessor.h
index 1dbe1a5799b..9b71e22ab1f 100644
--- a/iocore/hostdb/P_HostDBProcessor.h
+++ b/iocore/hostdb/P_HostDBProcessor.h
@@ -27,6 +27,8 @@
#pragma once
+#include
+
#include "I_HostDBProcessor.h"
#include "tscore/TsBuffer.h"
@@ -51,7 +53,7 @@ extern int hostdb_ttl_mode;
extern int hostdb_srv_enabled;
// extern int hostdb_timestamp;
-extern int hostdb_sync_frequency;
+extern ts_seconds hostdb_sync_frequency;
extern int hostdb_disable_reverse_lookup;
// Static configuration information
@@ -74,26 +76,6 @@ enum HostDBMark {
*/
extern const char *string_for(HostDBMark mark);
-inline unsigned int
-HOSTDB_CLIENT_IP_HASH(sockaddr const *lhs, sockaddr const *rhs)
-{
- unsigned int zret = ~static_cast(0);
- if (ats_ip_are_compatible(lhs, rhs)) {
- if (ats_is_ip4(lhs)) {
- in_addr_t ip1 = ats_ip4_addr_cast(lhs);
- in_addr_t ip2 = ats_ip4_addr_cast(rhs);
- zret = (ip1 >> 16) ^ ip1 ^ ip2 ^ (ip2 >> 16);
- } else if (ats_is_ip6(lhs)) {
- uint32_t const *ip1 = ats_ip_addr32_cast(lhs);
- uint32_t const *ip2 = ats_ip_addr32_cast(rhs);
- for (int i = 0; i < 4; ++i, ++ip1, ++ip2) {
- zret ^= (*ip1 >> 16) ^ *ip1 ^ *ip2 ^ (*ip2 >> 16);
- }
- }
- }
- return zret & 0xFFFF;
-}
-
//
// Constants
//
@@ -169,21 +151,12 @@ extern RecRawStatBlock *hostdb_rsb;
#define HOSTDB_DECREMENT_THREAD_DYN_STAT(_s, _t) RecIncrRawStatSum(hostdb_rsb, _t, (int)_s, -1);
-struct CmpConstBuffferCaseInsensitive {
- bool
- operator()(ts::ConstBuffer a, ts::ConstBuffer b) const
- {
- return ptr_len_casecmp(a._ptr, a._size, b._ptr, b._size) < 0;
- }
+struct HostFileRecord {
+ HostDBRecord::Handle record_4;
+ HostDBRecord::Handle record_6;
};
-// Our own typedef for the host file mapping
-typedef std::map HostsFileMap;
-// A to hold a ref-counted map
-struct RefCountedHostsFileMap : public RefCountObj {
- HostsFileMap hosts_file_map;
- ats_scoped_str HostFileText;
-};
+using HostFileMap = std::unordered_map>;
//
// HostDBCache (Private)
@@ -191,9 +164,11 @@ struct RefCountedHostsFileMap : public RefCountObj {
struct HostDBCache {
int start(int flags = 0);
// Map to contain all of the host file overrides, initialize it to empty
- Ptr hosts_file_ptr;
+ std::shared_ptr host_file;
+ std::shared_mutex host_file_mutex;
+
// TODO: make ATS call a close() method or something on shutdown (it does nothing of the sort today)
- RefCountCache *refcountcache = nullptr;
+ RefCountCache *refcountcache = nullptr;
// TODO configurable number of items in the cache
Queue *pending_dns = nullptr;
@@ -201,189 +176,9 @@ struct HostDBCache {
Queue *remoteHostDBQueue = nullptr;
HostDBCache();
bool is_pending_dns_for_hash(const CryptoHash &hash);
-};
-
-inline int
-HostDBRoundRobin::index_of(sockaddr const *ip)
-{
- bool bad = (rrcount <= 0 || (unsigned int)rrcount > hostdb_round_robin_max_count || good <= 0 ||
- (unsigned int)good > hostdb_round_robin_max_count);
- if (bad) {
- ink_assert(!"bad round robin size");
- return -1;
- }
-
- for (int i = 0; i < good; i++) {
- if (ats_ip_addr_eq(ip, info(i).ip())) {
- return i;
- }
- }
-
- return -1;
-}
-
-inline HostDBInfo *
-HostDBRoundRobin::find_ip(sockaddr const *ip)
-{
- int idx = this->index_of(ip);
- return idx < 0 ? nullptr : &info(idx);
-}
-
-inline HostDBInfo *
-HostDBRoundRobin::select_next(sockaddr const *ip)
-{
- HostDBInfo *zret = nullptr;
- if (good > 1) {
- int idx = this->index_of(ip);
- if (idx >= 0) {
- idx = (idx + 1) % good;
- zret = &info(idx);
- }
- }
- return zret;
-}
-
-inline HostDBInfo *
-HostDBRoundRobin::find_target(const char *target)
-{
- bool bad = (rrcount <= 0 || (unsigned int)rrcount > hostdb_round_robin_max_count || good <= 0 ||
- (unsigned int)good > hostdb_round_robin_max_count);
- if (bad) {
- ink_assert(!"bad round robin size");
- return nullptr;
- }
-
- uint32_t key = makeHostHash(target);
- for (int i = 0; i < good; i++) {
- if (info(i).data.srv.key == key && !strcmp(target, info(i).srvname(this)))
- return &info(i);
- }
- return nullptr;
-}
-
-inline HostDBInfo *
-HostDBRoundRobin::select_best_http(sockaddr const *client_ip, ink_time_t now, int32_t fail_window)
-{
- bool bad = (rrcount <= 0 || (unsigned int)rrcount > hostdb_round_robin_max_count || good <= 0 ||
- (unsigned int)good > hostdb_round_robin_max_count);
-
- if (bad) {
- ink_assert(!"bad round robin size");
- return nullptr;
- }
-
- int best_any = 0;
- int best_up = -1;
-
- // Basic round robin, increment current and mod with how many we have
- if (HostDBProcessor::hostdb_strict_round_robin) {
- Debug("hostdb", "Using strict round robin");
- // Check that the host we selected is alive
- for (int i = 0; i < good; i++) {
- best_any = current++ % good;
- if (info(best_any).is_alive(now, fail_window)) {
- best_up = best_any;
- break;
- }
- }
- } else if (HostDBProcessor::hostdb_timed_round_robin > 0) {
- Debug("hostdb", "Using timed round-robin for HTTP");
- if ((now - timed_rr_ctime) > HostDBProcessor::hostdb_timed_round_robin) {
- Debug("hostdb", "Timed interval expired.. rotating");
- ++current;
- timed_rr_ctime = now;
- }
- for (int i = 0; i < good; i++) {
- best_any = (current + i) % good;
- if (info(best_any).is_alive(now, fail_window)) {
- best_up = best_any;
- break;
- }
- }
- Debug("hostdb", "Using %d for best_up", best_up);
- } else {
- Debug("hostdb", "Using default round robin");
- unsigned int best_hash_any = 0;
- unsigned int best_hash_up = 0;
- for (int i = 0; i < good; i++) {
- sockaddr const *ip = info(i).ip();
- unsigned int h = HOSTDB_CLIENT_IP_HASH(client_ip, ip);
- if (best_hash_any <= h) {
- best_any = i;
- best_hash_any = h;
- }
- if (info(i).is_alive(now, fail_window)) {
- if (best_hash_up <= h) {
- best_up = i;
- best_hash_up = h;
- }
- }
- }
- }
-
- if (best_up != -1) {
- ink_assert(best_up >= 0 && best_up < good);
- return &info(best_up);
- } else {
- ink_assert(best_any >= 0 && best_any < good);
- return &info(best_any);
- }
-}
-
-inline HostDBInfo *
-HostDBRoundRobin::select_best_srv(char *target, InkRand *rand, ink_time_t now, int32_t fail_window)
-{
- bool bad = (rrcount <= 0 || (unsigned int)rrcount > hostdb_round_robin_max_count || good <= 0 ||
- (unsigned int)good > hostdb_round_robin_max_count);
-
- if (bad) {
- ink_assert(!"bad round robin size");
- return nullptr;
- }
-
-#ifdef DEBUG
- for (int i = 1; i < good; ++i) {
- ink_assert(info(i).data.srv.srv_priority >= info(i - 1).data.srv.srv_priority);
- }
-#endif
-
- int i = 0;
- int len = 0;
- uint32_t weight = 0, p = INT32_MAX;
- HostDBInfo *result = nullptr;
- HostDBInfo *infos[good];
- do {
- // if the real isn't alive-- exclude it from selection
- if (!info(i).is_alive(now, fail_window)) {
- continue;
- }
-
- if (info(i).data.srv.srv_priority <= p) {
- p = info(i).data.srv.srv_priority;
- weight += info(i).data.srv.srv_weight;
- infos[len++] = &info(i);
- } else
- break;
- } while (++i < good);
-
- if (len == 0) { // all failed
- result = &info(current++ % good);
- } else if (weight == 0) { // srv weight is 0
- result = infos[current++ % len];
- } else {
- uint32_t xx = rand->random() % weight;
- for (i = 0; i < len - 1 && xx >= infos[i]->data.srv.srv_weight; ++i)
- xx -= infos[i]->data.srv.srv_weight;
-
- result = infos[i];
- }
- if (result) {
- ink_strlcpy(target, result->srvname(this), MAXDNAME);
- return result;
- }
- return nullptr;
-}
+ std::shared_ptr acquire_host_file();
+};
//
// Types
@@ -397,10 +192,9 @@ struct HostDBHash {
CryptoHash hash; ///< The hash value.
- const char *host_name = nullptr; ///< Host name.
- int host_len = 0; ///< Length of @a _host_name
- IpAddr ip; ///< IP address.
- in_port_t port = 0; ///< IP port (host order).
+ ts::TextView host_name; ///< Name of the host for the query.
+ IpAddr ip; ///< IP address.
+ in_port_t port = 0; ///< IP port (host order).
/// DNS server. Not strictly part of the hash data but
/// it's both used by @c HostDBContinuation and provides access to
/// hash data. It's just handier to store it here for both uses.
@@ -417,20 +211,23 @@ struct HostDBHash {
/** Assign a hostname.
This updates the split DNS data as well.
*/
- self &set_host(const char *name, int len);
+ self &set_host(ts::TextView name);
+ self &
+ set_host(char const *name)
+ {
+ return this->set_host(ts::TextView{name, strlen(name)});
+ }
};
//
// Handles a HostDB lookup request
//
-struct HostDBContinuation;
-typedef int (HostDBContinuation::*HostDBContHandler)(int, void *);
+using HostDBContHandler = int (HostDBContinuation::*)(int, void *);
struct HostDBContinuation : public Continuation {
Action action;
HostDBHash hash;
- // IpEndpoint ip;
- unsigned int ttl = 0;
+ ts_seconds ttl{0};
// HostDBMark db_mark; ///< Target type.
/// Original IP address family style. Note this will disagree with
/// @a hash.db_mark when doing a retry on an alternate family. The retry
@@ -439,9 +236,8 @@ struct HostDBContinuation : public Continuation {
int dns_lookup_timeout = DEFAULT_OPTIONS.timeout;
Event *timeout = nullptr;
Continuation *from_cont = nullptr;
- HostDBApplicationInfo app;
- int probe_depth = 0;
- size_t current_iterate_pos = 0;
+ int probe_depth = 0;
+ size_t current_iterate_pos = 0;
// char name[MAXDNAME];
// int namelen;
char hash_host_name_store[MAXDNAME + 1]; // used as backing store for @a hash
@@ -451,7 +247,6 @@ struct HostDBContinuation : public Continuation {
unsigned int missing : 1;
unsigned int force_dns : 1;
- unsigned int round_robin : 1;
int probeEvent(int event, Event *e);
int iterateEvent(int event, Event *e);
@@ -474,19 +269,23 @@ struct HostDBContinuation : public Continuation {
{
return hash.db_mark == HOSTDB_MARK_SRV;
}
- HostDBInfo *lookup_done(IpAddr const &ip, const char *aname, bool round_robin, unsigned int attl, SRVHosts *s = nullptr,
- HostDBInfo *r = nullptr);
+
+ Ptr
+ lookup_done(const char *query_name, ts_seconds answer_ttl, SRVHosts *s = nullptr, Ptr record = Ptr{})
+ {
+ return this->lookup_done(ts::TextView{query_name, strlen(query_name)}, answer_ttl, s, record);
+ }
+
+ Ptr lookup_done(ts::TextView query_name, ts_seconds answer_ttl, SRVHosts *s = nullptr,
+ Ptr record = Ptr{});
+
int key_partition();
- void remove_trigger_pending_dns();
+ void remove_and_trigger_pending_dns();
int set_check_pending_dns();
- HostDBInfo *insert(unsigned int attl);
-
/** Optional values for @c init.
*/
struct Options {
- typedef Options self; ///< Self reference type.
-
int timeout = 0; ///< Timeout value. Default 0
HostResStyle host_res_style = HOST_RES_NONE; ///< IP address family fallback. Default @c HOST_RES_NONE
bool force_dns = false; ///< Force DNS lookup. Default @c false
@@ -499,7 +298,7 @@ struct HostDBContinuation : public Continuation {
int make_get_message(char *buf, int len);
int make_put_message(HostDBInfo *r, Continuation *c, char *buf, int len);
- HostDBContinuation() : missing(false), force_dns(DEFAULT_OPTIONS.force_dns), round_robin(false)
+ HostDBContinuation() : missing(false), force_dns(DEFAULT_OPTIONS.force_dns)
{
ink_zero(hash_host_name_store);
ink_zero(hash.hash);
@@ -513,12 +312,6 @@ master_hash(CryptoHash const &hash)
return static_cast(hash[1] >> 32);
}
-inline bool
-is_dotted_form_hostname(const char *c)
-{
- return -1 != (int)ink_inet_addr(c);
-}
-
inline Queue &
HostDBCache::pending_dns_for_hash(const CryptoHash &hash)
{
diff --git a/iocore/net/ALPNSupport.cc b/iocore/net/ALPNSupport.cc
index 69b8b1c24d7..1e9bc8aacbe 100644
--- a/iocore/net/ALPNSupport.cc
+++ b/iocore/net/ALPNSupport.cc
@@ -57,11 +57,10 @@ ALPNSupport::unbind(SSL *ssl)
void
ALPNSupport::clear()
{
- if (npn) {
- ats_free(npn);
- npn = nullptr;
- npnsz = 0;
- }
+ ats_free(npn);
+ npn = nullptr;
+ npnsz = 0;
+
npnSet = nullptr;
npnEndpoint = nullptr;
}
diff --git a/iocore/net/Makefile.am b/iocore/net/Makefile.am
index 0f8e6d28e7d..da278b8b864 100644
--- a/iocore/net/Makefile.am
+++ b/iocore/net/Makefile.am
@@ -154,6 +154,7 @@ libinknet_a_SOURCES = \
P_SSLUtils.h \
P_SSLClientCoordinator.h \
P_SSLClientUtils.h \
+ P_TLSKeyLogger.h \
P_OCSPStapling.h \
P_UDPConnection.h \
P_UDPIOEvent.h \
@@ -189,6 +190,7 @@ libinknet_a_SOURCES = \
OCSPStapling.cc \
TLSBasicSupport.cc \
TLSEarlyDataSupport.cc \
+ TLSKeyLogger.cc \
TLSSessionResumptionSupport.cc \
TLSSNISupport.cc \
TLSTunnelSupport.cc \
diff --git a/iocore/net/OCSPStapling.cc b/iocore/net/OCSPStapling.cc
index 252fda1e09c..da5a8c16356 100644
--- a/iocore/net/OCSPStapling.cc
+++ b/iocore/net/OCSPStapling.cc
@@ -48,6 +48,7 @@ struct certinfo {
OCSP_CERTID *cid; // Certificate ID for OCSP requests or nullptr if ID cannot be determined
char *uri; // Responder details
char *certname;
+ char *user_agent;
ink_mutex stapling_mutex;
unsigned char resp_der[MAX_STAPLING_DER];
unsigned int resp_derlen;
@@ -72,14 +73,16 @@ certinfo_map_free(void * /*parent*/, void *ptr, CRYPTO_EX_DATA * /*ad*/, int /*i
}
for (certinfo_map::iterator iter = map->begin(); iter != map->end(); ++iter) {
- if (iter->second->uri) {
- OPENSSL_free(iter->second->uri);
+ certinfo *cinf = iter->second;
+ if (cinf->uri) {
+ OPENSSL_free(cinf->uri);
}
- if (iter->second->certname) {
- ats_free(iter->second->certname);
- }
- ink_mutex_destroy(&iter->second->stapling_mutex);
- OPENSSL_free(iter->second);
+
+ ats_free(cinf->certname);
+ ats_free(cinf->user_agent);
+
+ ink_mutex_destroy(&cinf->stapling_mutex);
+ OPENSSL_free(cinf);
}
delete map;
}
@@ -211,9 +214,12 @@ ssl_stapling_init_cert(SSL_CTX *ctx, X509 *cert, const char *certname, const cha
}
// Initialize certinfo
- cinf->cid = nullptr;
- cinf->uri = nullptr;
- cinf->certname = ats_strdup(certname);
+ cinf->cid = nullptr;
+ cinf->uri = nullptr;
+ cinf->certname = ats_strdup(certname);
+ if (SSLConfigParams::ssl_ocsp_user_agent != nullptr) {
+ cinf->user_agent = ats_strdup(SSLConfigParams::ssl_ocsp_user_agent);
+ }
cinf->resp_derlen = 0;
ink_mutex_init(&cinf->stapling_mutex);
cinf->is_prefetched = rsp_file ? true : false;
@@ -287,9 +293,8 @@ ssl_stapling_init_cert(SSL_CTX *ctx, X509 *cert, const char *certname, const cha
OCSP_CERTID_free(cinf->cid);
}
- if (cinf->certname) {
- ats_free(cinf->certname);
- }
+ ats_free(cinf->certname);
+ ats_free(cinf->user_agent);
if (cinf) {
OPENSSL_free(cinf);
@@ -368,7 +373,7 @@ stapling_check_response(certinfo *cinf, OCSP_RESPONSE *rsp)
}
static OCSP_RESPONSE *
-query_responder(BIO *b, char *host, char *path, OCSP_REQUEST *req, int req_timeout)
+query_responder(BIO *b, const char *host, const char *path, const char *user_agent, OCSP_REQUEST *req, int req_timeout)
{
ink_hrtime start, end;
OCSP_RESPONSE *resp = nullptr;
@@ -380,6 +385,9 @@ query_responder(BIO *b, char *host, char *path, OCSP_REQUEST *req, int req_timeo
ctx = OCSP_sendreq_new(b, path, nullptr, -1);
OCSP_REQ_CTX_add1_header(ctx, "Host", host);
+ if (user_agent != nullptr) {
+ OCSP_REQ_CTX_add1_header(ctx, "User-Agent", user_agent);
+ }
OCSP_REQ_CTX_set1_req(ctx, req);
do {
@@ -399,7 +407,7 @@ query_responder(BIO *b, char *host, char *path, OCSP_REQUEST *req, int req_timeo
}
static OCSP_RESPONSE *
-process_responder(OCSP_REQUEST *req, char *host, char *path, char *port, int req_timeout)
+process_responder(OCSP_REQUEST *req, const char *host, const char *path, const char *port, const char *user_agent, int req_timeout)
{
BIO *cbio = nullptr;
OCSP_RESPONSE *resp = nullptr;
@@ -416,7 +424,7 @@ process_responder(OCSP_REQUEST *req, char *host, char *path, char *port, int req
Debug("ssl_ocsp", "process_responder: failed to connect to OCSP server; host=%s port=%s path=%s", host, port, path);
goto end;
}
- resp = query_responder(cbio, host, path, req, req_timeout);
+ resp = query_responder(cbio, host, path, user_agent, req, req_timeout);
end:
if (cbio) {
@@ -456,7 +464,7 @@ stapling_refresh_response(certinfo *cinf, OCSP_RESPONSE **prsp)
goto err;
}
- *prsp = process_responder(req, host, path, port, SSLConfigParams::ssl_ocsp_request_timeout);
+ *prsp = process_responder(req, host, path, port, cinf->user_agent, SSLConfigParams::ssl_ocsp_request_timeout);
if (*prsp == nullptr) {
goto done;
}
diff --git a/iocore/net/P_QUICNextProtocolAccept.h b/iocore/net/P_QUICNextProtocolAccept.h
index 95752dd21e0..3ca44db5506 100644
--- a/iocore/net/P_QUICNextProtocolAccept.h
+++ b/iocore/net/P_QUICNextProtocolAccept.h
@@ -33,7 +33,7 @@ class QUICNextProtocolAccept : public SessionAccept
QUICNextProtocolAccept();
~QUICNextProtocolAccept();
- bool accept(NetVConnection *, MIOBuffer *, IOBufferReader *);
+ bool accept(NetVConnection *, MIOBuffer *, IOBufferReader *) override;
// Register handler as an endpoint for the specified protocol. Neither
// handler nor protocol are copied, so the caller must guarantee their
@@ -50,7 +50,7 @@ class QUICNextProtocolAccept : public SessionAccept
QUICNextProtocolAccept &operator=(const QUICNextProtocolAccept &) = delete; // disabled
private:
- int mainEvent(int event, void *netvc);
+ int mainEvent(int event, void *netvc) override;
SSLNextProtocolSet protoset;
SessionProtocolSet protoenabled;
diff --git a/iocore/net/P_SSLConfig.h b/iocore/net/P_SSLConfig.h
index 2018c34ea95..99ebf9db77a 100644
--- a/iocore/net/P_SSLConfig.h
+++ b/iocore/net/P_SSLConfig.h
@@ -124,6 +124,7 @@ struct SSLConfigParams : public ConfigInfo {
static int ssl_ocsp_update_period;
static int ssl_handshake_timeout_in;
char *ssl_ocsp_response_path_only;
+ static char *ssl_ocsp_user_agent;
static int origin_session_cache;
static size_t origin_session_cache_size;
@@ -213,7 +214,7 @@ struct SSLTicketParams : public ConfigInfo {
time_t load_time = 0;
char *ticket_key_filename;
bool LoadTicket(bool &nochange);
- void LoadTicketData(char *ticket_data, int ticket_data_len);
+ bool LoadTicketData(char *ticket_data, int ticket_data_len);
void cleanup();
~SSLTicketParams() override { cleanup(); }
diff --git a/iocore/net/P_SSLUtils.h b/iocore/net/P_SSLUtils.h
index dc8c3e80c34..b8ca3b81284 100644
--- a/iocore/net/P_SSLUtils.h
+++ b/iocore/net/P_SSLUtils.h
@@ -35,9 +35,7 @@
#include "P_SSLCertLookup.h"
#include