From 871d2fade89642225f72d587446e84ae89d7d532 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Mon, 26 Dec 2022 10:46:43 +0900 Subject: [PATCH 1/9] Add BRAVO Reader-Writer Lock --- .gitignore | 1 + NOTICE | 5 + include/tscpp/util/Bravo.h | 382 ++++++++++++++++++ src/tscpp/util/Makefile.am | 6 + .../util/unit_tests/benchmark_shared_mutex.cc | 130 ++++++ src/tscpp/util/unit_tests/test_Bravo.cc | 192 +++++++++ 6 files changed, 716 insertions(+) create mode 100644 include/tscpp/util/Bravo.h create mode 100644 src/tscpp/util/unit_tests/benchmark_shared_mutex.cc create mode 100644 src/tscpp/util/unit_tests/test_Bravo.cc diff --git a/.gitignore b/.gitignore index f676c43dc88..4e7545d62d0 100644 --- a/.gitignore +++ b/.gitignore @@ -99,6 +99,7 @@ src/tscore/test_geometry src/tscore/test_Regex src/tscore/test_X509HostnameValidator src/tscore/test_tscore +src/tscpp/util/benchmark_shared_mutex src/tscpp/util/test_tscpputil src/records/test_librecords src/records/test_librecords_on_eventsystem diff --git a/NOTICE b/NOTICE index 949f852011e..c661045a582 100644 --- a/NOTICE +++ b/NOTICE @@ -96,3 +96,8 @@ https://github.com/jbeder/yaml-cpp fastlz: an ANSI C/C90 implementation of Lempel-Ziv 77 algorithm (LZ77) of lossless data compression. https://github.com/ariya/FastLZ + +~~ + +include/tscpp/util/Bravo.h is C++ version of puzpuzpuz/xsync's RBMutex +Copyright (c) 2021 Andrey Pechkurov diff --git a/include/tscpp/util/Bravo.h b/include/tscpp/util/Bravo.h new file mode 100644 index 00000000000..0438f71a8c5 --- /dev/null +++ b/include/tscpp/util/Bravo.h @@ -0,0 +1,382 @@ +/** @file + + Implementation of BRAVO - Biased Locking for Reader-Writer Locks + + Dave Dice and Alex Kogan. 2019. BRAVO: Biased Locking for Reader-Writer Locks. + In Proceedings of the 2019 USENIX Annual Technical Conference (ATC). USENIX Association, Renton, WA, 315–328. + + https://www.usenix.org/conference/atc19/presentation/dice + + > Section 3. + > BRAVO acts as an accelerator layer, as readers can always fall back to the traditional underlying lock to gain read access. + > ... + > Write performance and the scalability of read-vs-write and write-vs-write behavior depends solely on the underlying lock. + + This code is C++ version of puzpuzpuz/xsync's RBMutex + https://github.com/puzpuzpuz/xsync/blob/main/rbmutex.go + Copyright (c) 2021 Andrey Pechkurov + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace ts::bravo +{ +static inline uint32_t +mix32(uint64_t z) +{ + z = (z ^ (z >> 33)) * 0xff51afd7ed558ccdL; + z = (z ^ (z >> 33)) * 0xc4ceb9fe1a85ec53L; + return static_cast(z >> 32); +} + +using time_point = std::chrono::time_point; + +#ifdef __cpp_lib_hardware_interference_size +using std::hardware_constructive_interference_size; +#else +// 64 bytes on x86-64 │ L1_CACHE_BYTES │ L1_CACHE_SHIFT │ __cacheline_aligned │ ... +constexpr std::size_t hardware_constructive_interference_size = 64; +#endif + +/** + ts::bravo::Token + + Token for readers. + 0 is special value that represents inital/invalid value. + */ +using Token = size_t; + +/** + ts::bravo::shared_lock + */ +template class shared_lock +{ +public: + using mutex_type = Mutex; + + shared_lock() = default; + shared_lock(Mutex &m) : _mutex(&m) { lock(); } + + ~shared_lock() + { + if (_owns) { + _mutex->unlock_shared(_token); + } + }; + + //// + // Not Copyable + // + shared_lock(shared_lock const &) = delete; + shared_lock &operator=(shared_lock const &) = delete; + + //// + // Moveable + // + shared_lock(shared_lock &&s) : _mutex(s._mutex), _token(s._token), _owns(s._owns) + { + s._mutex = nullptr; + s._token = 0; + s._owns = false; + }; + + shared_lock & + operator=(shared_lock &&s) + { + if (_owns) { + _mutex->unlock_shared(_token); + } + _mutex = s._mutex; + _token = s._token; + _owns = s._owns; + + s._mutex = nullptr; + s._token = 0; + s._owns = false; + }; + + //// + // Shared locking + // + void + lock() + { + _mutex->lock_shared(_token); + _owns = true; + } + + bool + try_lock() + { + _owns = _mutex->try_lock_shared(_token); + return _owns; + } + + // not implemented yet + bool try_lock_for() = delete; + bool try_lock_until() = delete; + + void + unlock() + { + _mutex->unlock_shared(_token); + _owns = false; + } + + //// + // Modifiers + // + void + swap(shared_lock &s) + { + std::swap(_mutex, s._mutex); + std::swap(_token, s._token); + std::swap(_owns, s._owns); + } + + mutex_type * + release() + { + mutex_type *m = _mutex; + _mutex = nullptr; + _token = 0; + _owns = false; + return m; + } + + //// + // Observers + // + mutex_type * + mutex() + { + return _mutex; + } + + Token + token() + { + return _token; + } + + bool + owns_lock() + { + return _owns; + } + +private: + mutex_type *_mutex = nullptr; + Token _token = 0; + bool _owns = false; +}; + +/** + ts::bravo::shared_mutex + + You can use std::lock_guard for writers but, you can't use std::shared_lock for readers to handle ts::bravo::Token. + Use ts::bravo::shared_lock for readers. + + SLOT_SIZE needs to be larget than number of threads for lock_shared to go fast-path. + */ +template class shared_mutex_impl +{ +public: + shared_mutex_impl() = default; + ~shared_mutex_impl() = default; + + //// + // No copying or moving. + // + shared_mutex_impl(shared_mutex_impl const &) = delete; + shared_mutex_impl &operator=(shared_mutex_impl const &) = delete; + + shared_mutex_impl(shared_mutex_impl &&) = delete; + shared_mutex_impl &operator=(shared_mutex_impl &&) = delete; + + //// + // Exclusive locking + // + void + lock() + { + _mutex.underlying.lock(); + _revoke(); + } + + bool + try_lock() + { + bool r = _mutex.underlying.try_lock(); + if (!r) { + return false; + } + + _revoke(); + + return true; + } + + void + unlock() + { + _mutex.underlying.unlock(); + } + + //// + // Shared locking + // + void + lock_shared(Token &token) + { + // Fast path + if (_mutex.read_bias.load(std::memory_order_acquire)) { + size_t index = _starting_point(); + for (size_t i = 0; i < SLOT_SIZE; ++i) { + index = (index + i) % SLOT_SIZE; + Slot &slot = _mutex.readers[index]; + bool expect = false; + if (slot.mu.compare_exchange_strong(expect, true, std::memory_order_relaxed)) { + // recheck + if (_mutex.read_bias.load(std::memory_order_acquire)) { + token = index + 1; + return; + } else { + slot.mu.store(false, std::memory_order_relaxed); + } + } + } + } + + // Slow path + _mutex.underlying.lock_shared(); + if (_mutex.read_bias.load(std::memory_order_acquire) == false && _now() >= _mutex.inhibit_until) { + _mutex.read_bias.store(true, std::memory_order_release); + } + } + + bool + try_lock_shared(Token &token) + { + // Fast path + if (_mutex.read_bias.load(std::memory_order_acquire)) { + size_t index = _starting_point(); + for (size_t i = 0; i < SLOT_SIZE; ++i) { + index += i; + Slot &slot = _mutex.readers[index]; + bool expect = false; + if (slot.mu.compare_exchange_weak(expect, true, std::memory_order_release, std::memory_order_relaxed)) { + // recheck + if (_mutex.read_bias.load(std::memory_order_acquire)) { + token = index + 1; + return true; + } else { + slot.mu.store(false, std::memory_order_relaxed); + } + } + } + } + + // Slow path + bool r = _mutex.underlying.try_lock_shared(); + if (r && _mutex.read_bias.load(std::memory_order_acquire) == false && _now() >= _mutex.inhibit_until) { + _mutex.read_bias.store(true, std::memory_order_release); + return true; + } + + return false; + } + + void + unlock_shared(const Token token) + { + if (token == 0) { + _mutex.underlying.unlock_shared(); + return; + } + + Slot &slot = _mutex.readers[token - 1]; + slot.mu.store(0, std::memory_order_relaxed); + } + +private: + struct alignas(hardware_constructive_interference_size) Slot { + std::atomic mu = false; + }; + + struct Mutex { + std::atomic read_bias = false; + std::array readers = {}; + time_point inhibit_until{}; + T underlying; + }; + + time_point + _now() + { + return std::chrono::system_clock::now(); + } + + size_t + _starting_point() + { + std::hash hasher; + return mix32(hasher(std::this_thread::get_id())) % SLOT_SIZE; + } + + /** + Disable read bias and do revocation + */ + void + _revoke() + { + if (!_mutex.read_bias.load(std::memory_order_acquire)) { + // do nothing + return; + } + + _mutex.read_bias.store(false, std::memory_order_release); + time_point start = _now(); + for (size_t i = 0; i < SLOT_SIZE; ++i) { + for (int j = 0; _mutex.readers[i].mu.load(std::memory_order_relaxed); ++j) { + std::this_thread::sleep_for(std::chrono::nanoseconds(1 << j)); + } + } + time_point n = _now(); + _mutex.inhibit_until = n + ((n - start) * SLOWDOWN_GUARD); + } + + //// + // Variables + // + Mutex _mutex; +}; + +using shared_mutex = shared_mutex_impl<>; + +} // namespace ts::bravo diff --git a/src/tscpp/util/Makefile.am b/src/tscpp/util/Makefile.am index 66dffc2cc3e..2fd87a821d3 100644 --- a/src/tscpp/util/Makefile.am +++ b/src/tscpp/util/Makefile.am @@ -19,6 +19,7 @@ include $(top_srcdir)/build/tidy.mk check_PROGRAMS = test_tscpputil +noinst_PROGRAMS = benchmark_shared_mutex TESTS = $(check_PROGRAMS) @@ -39,12 +40,17 @@ test_tscpputil_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) test_tscpputil_LDADD = libtscpputil.la @SWOC_LIBS@ test_tscpputil_SOURCES = \ unit_tests/unit_test_main.cc \ + unit_tests/test_Bravo.cc \ unit_tests/test_LocalBuffer.cc \ unit_tests/test_PostScript.cc \ unit_tests/test_Strerror.cc \ unit_tests/test_TextView.cc \ unit_tests/test_ts_meta.cc +benchmark_shared_mutex_CPPFLAGS = $(AM_CPPFLAGS) -I$(abs_top_srcdir)/tests/include +benchmark_shared_mutex_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) +benchmark_shared_mutex_SOURCES = unit_tests/benchmark_shared_mutex.cc + clean-local: rm -f ParseRulesCType ParseRulesCTypeToLower ParseRulesCTypeToUpper diff --git a/src/tscpp/util/unit_tests/benchmark_shared_mutex.cc b/src/tscpp/util/unit_tests/benchmark_shared_mutex.cc new file mode 100644 index 00000000000..a8bb22f4876 --- /dev/null +++ b/src/tscpp/util/unit_tests/benchmark_shared_mutex.cc @@ -0,0 +1,130 @@ +/** @file + + Micro Benchmark tool for shared_mutex - requires Catch2 v2.9.0+ + + - e.g. example of running 64 threads with read/write rate is 100:1 + ``` + $ taskset -c 0-63 ./benchmark_shared_mutex --ts-nthreads 64 --ts-nloop 1000 --ts-nread 100 --ts-nwrite 1 + ``` + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#define CATCH_CONFIG_ENABLE_BENCHMARKING +#define CATCH_CONFIG_RUNNER + +#include "catch.hpp" + +#include "tscpp/util/Bravo.h" + +#include +#include +#include + +namespace +{ +// Args +struct Conf { + int nloop = 1; + int nthreads = 1; + int nread = 1; + int nwrite = 1; +}; + +Conf conf; +thread_local int counter = 0; + +template +void +run(T &mutex) +{ + std::thread list[conf.nthreads]; + + for (int i = 0; i < conf.nthreads; i++) { + new (&list[i]) std::thread{[](T &mutex) { + for (int j = 0; j < conf.nloop; ++j) { + // reader + for (int i = 0; i < conf.nread; ++i) { + S lock(mutex); + // Do not optimize + ++counter; + } + + // writer + for (int i = 0; i < conf.nwrite; ++i) { + std::lock_guard lock(mutex); + // Do not optimize + ++counter; + } + } + }, + std::ref(mutex)}; + } + + for (int i = 0; i < conf.nthreads; i++) { + list[i].join(); + } +} + +} // namespace + +TEST_CASE("Micro benchmark of shared_mutex", "") +{ + SECTION("std::shared_mutex") + { + BENCHMARK("std::shared_mutex", ) + { + std::shared_mutex mutex; + + run>(mutex); + }; + } + + SECTION("ts::bravo::shared_mutex") + { + BENCHMARK("ts::bravo::shared_mutex") + { + ts::bravo::shared_mutex mutex; + run>(mutex); + }; + } +} + +int +main(int argc, char *argv[]) +{ + Catch::Session session; + + using namespace Catch::clara; + + // clang-format off + auto cli = session.cli() | + Opt(conf.nthreads, "")["--ts-nthreads"]("number of threads (default: 1)") | + Opt(conf.nread, "")["--ts-nread"]("number of read op (default: 1)") | + Opt(conf.nwrite, "")["--ts-nwrite"]("number of write op (default: 1)") | + Opt(conf.nloop, "")["--ts-nloop"]("number of read-write loop (default: 1)"); + // clang-format on + + session.cli(cli); + + int returnCode = session.applyCommandLine(argc, argv); + if (returnCode != 0) { + return returnCode; + } + + return session.run(); +} diff --git a/src/tscpp/util/unit_tests/test_Bravo.cc b/src/tscpp/util/unit_tests/test_Bravo.cc new file mode 100644 index 00000000000..0cb7a5105c7 --- /dev/null +++ b/src/tscpp/util/unit_tests/test_Bravo.cc @@ -0,0 +1,192 @@ +/** @file + + Unit tests for BRAVO + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "catch.hpp" +#include "tscpp/util/Bravo.h" + +#include +#include +#include +#include + +using namespace std::chrono_literals; + +TEST_CASE("BRAVO - simple check with try-lock", "[libts][BRAVO]") +{ + SECTION("reader-reader") + { + ts::bravo::shared_mutex mutex; + ts::bravo::shared_lock lock(mutex); + CHECK(lock.owns_lock() == true); + + std::thread t{[](ts::bravo::shared_mutex &mutex) { + ts::bravo::Token token{0}; + CHECK(mutex.try_lock_shared(token) == true); + mutex.unlock_shared(token); + }, + std::ref(mutex)}; + + t.join(); + } + + SECTION("reader-writer") + { + ts::bravo::shared_mutex mutex; + ts::bravo::shared_lock lock(mutex); + CHECK(lock.owns_lock() == true); + + std::thread t{[](ts::bravo::shared_mutex &mutex) { CHECK(mutex.try_lock() == false); }, std::ref(mutex)}; + + t.join(); + } + + SECTION("writer-reader") + { + ts::bravo::shared_mutex mutex; + std::lock_guard lock(mutex); + + std::thread t{[](ts::bravo::shared_mutex &mutex) { + ts::bravo::Token token{0}; + CHECK(mutex.try_lock_shared(token) == false); + CHECK(token == 0); + }, + std::ref(mutex)}; + + t.join(); + } + + SECTION("writer-writer") + { + ts::bravo::shared_mutex mutex; + std::lock_guard lock(mutex); + + std::thread t{[](ts::bravo::shared_mutex &mutex) { CHECK(mutex.try_lock() == false); }, std::ref(mutex)}; + + t.join(); + } +} + +TEST_CASE("BRAVO - check with race", "[libts][BRAVO]") +{ + SECTION("reader-reader") + { + ts::bravo::shared_mutex mutex; + int i = 0; + + std::thread t1{[&](ts::bravo::shared_mutex &mutex) { + ts::bravo::shared_lock lock(mutex); + CHECK(lock.owns_lock() == true); + ++i; + }, + std::ref(mutex)}; + + std::thread t2{[&](ts::bravo::shared_mutex &mutex) { + ts::bravo::shared_lock lock(mutex); + CHECK(lock.owns_lock() == true); + ++i; + }, + std::ref(mutex)}; + + t1.join(); + t2.join(); + + CHECK(i == 2); + } + + SECTION("reader-writer") + { + ts::bravo::shared_mutex mutex; + int i = 0; + + std::thread t1{[&](ts::bravo::shared_mutex &mutex) { + ts::bravo::shared_lock lock(mutex); + CHECK(lock.owns_lock() == true); + std::this_thread::sleep_for(100ms); + ++i; + }, + std::ref(mutex)}; + + std::thread t2{[&](ts::bravo::shared_mutex &mutex) { + std::this_thread::sleep_for(50ms); + std::lock_guard lock(mutex); + ++i; + }, + std::ref(mutex)}; + + t1.join(); + t2.join(); + + CHECK(i == 2); + } + + SECTION("writer-reader") + { + ts::bravo::shared_mutex mutex; + int i = 0; + + std::thread t1{[&](ts::bravo::shared_mutex &mutex) { + std::lock_guard lock(mutex); + std::this_thread::sleep_for(100ms); + ++i; + }, + std::ref(mutex)}; + + std::thread t2{[&](ts::bravo::shared_mutex &mutex) { + std::this_thread::sleep_for(50ms); + ts::bravo::shared_lock lock(mutex); + CHECK(lock.owns_lock() == true); + ++i; + }, + std::ref(mutex)}; + + t1.join(); + t2.join(); + + CHECK(i == 2); + } + + SECTION("writer-writer") + { + ts::bravo::shared_mutex mutex; + int i = 0; + + std::thread t1{[&](ts::bravo::shared_mutex &mutex) { + std::lock_guard lock(mutex); + std::this_thread::sleep_for(100ms); + ++i; + }, + std::ref(mutex)}; + + std::thread t2{[&](ts::bravo::shared_mutex &mutex) { + std::this_thread::sleep_for(50ms); + std::lock_guard lock(mutex); + ++i; + }, + std::ref(mutex)}; + + t1.join(); + t2.join(); + + CHECK(i == 2); + } +} From f9282038fad66033b260f760ba990b0cdab47c16 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Tue, 21 Feb 2023 11:37:25 +0900 Subject: [PATCH 2/9] Fix bug in the slow path of try_lock_shared --- NOTICE | 2 +- include/tscpp/util/Bravo.h | 10 ++-- src/tscpp/util/unit_tests/test_Bravo.cc | 61 ++++++++++++++++++++----- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/NOTICE b/NOTICE index c661045a582..50552702ca3 100644 --- a/NOTICE +++ b/NOTICE @@ -100,4 +100,4 @@ https://github.com/ariya/FastLZ ~~ include/tscpp/util/Bravo.h is C++ version of puzpuzpuz/xsync's RBMutex -Copyright (c) 2021 Andrey Pechkurov +Copyright (c) 2021 Andrey Pechkurov (MIT License) diff --git a/include/tscpp/util/Bravo.h b/include/tscpp/util/Bravo.h index 0438f71a8c5..0b52694f56e 100644 --- a/include/tscpp/util/Bravo.h +++ b/include/tscpp/util/Bravo.h @@ -304,8 +304,12 @@ template = _mutex.inhibit_until) { - _mutex.read_bias.store(true, std::memory_order_release); + if (r) { + // Set RBias if the BRAVO policy allows that + if (_mutex.read_bias.load(std::memory_order_acquire) == false && _now() >= _mutex.inhibit_until) { + _mutex.read_bias.store(true, std::memory_order_release); + } + return true; } @@ -321,7 +325,7 @@ template lock(mutex); CHECK(lock.owns_lock() == true); - ++i; + CHECK(i == 0); }, std::ref(mutex)}; std::thread t2{[&](ts::bravo::shared_mutex &mutex) { ts::bravo::shared_lock lock(mutex); CHECK(lock.owns_lock() == true); - ++i; + CHECK(i == 0); }, std::ref(mutex)}; t1.join(); t2.join(); - CHECK(i == 2); + CHECK(i == 0); } SECTION("reader-writer") @@ -121,22 +158,22 @@ TEST_CASE("BRAVO - check with race", "[libts][BRAVO]") std::thread t1{[&](ts::bravo::shared_mutex &mutex) { ts::bravo::shared_lock lock(mutex); CHECK(lock.owns_lock() == true); + CHECK(i == 0); std::this_thread::sleep_for(100ms); - ++i; }, std::ref(mutex)}; std::thread t2{[&](ts::bravo::shared_mutex &mutex) { std::this_thread::sleep_for(50ms); std::lock_guard lock(mutex); - ++i; + CHECK(++i == 1); }, std::ref(mutex)}; t1.join(); t2.join(); - CHECK(i == 2); + CHECK(i == 1); } SECTION("writer-reader") @@ -147,7 +184,7 @@ TEST_CASE("BRAVO - check with race", "[libts][BRAVO]") std::thread t1{[&](ts::bravo::shared_mutex &mutex) { std::lock_guard lock(mutex); std::this_thread::sleep_for(100ms); - ++i; + CHECK(++i == 1); }, std::ref(mutex)}; @@ -155,14 +192,14 @@ TEST_CASE("BRAVO - check with race", "[libts][BRAVO]") std::this_thread::sleep_for(50ms); ts::bravo::shared_lock lock(mutex); CHECK(lock.owns_lock() == true); - ++i; + CHECK(i == 1); }, std::ref(mutex)}; t1.join(); t2.join(); - CHECK(i == 2); + CHECK(i == 1); } SECTION("writer-writer") @@ -173,14 +210,14 @@ TEST_CASE("BRAVO - check with race", "[libts][BRAVO]") std::thread t1{[&](ts::bravo::shared_mutex &mutex) { std::lock_guard lock(mutex); std::this_thread::sleep_for(100ms); - ++i; + CHECK(++i == 1); }, std::ref(mutex)}; std::thread t2{[&](ts::bravo::shared_mutex &mutex) { std::this_thread::sleep_for(50ms); std::lock_guard lock(mutex); - ++i; + CHECK(++i == 2); }, std::ref(mutex)}; From 94b154d15c5d901f98b413009f1af9cb56ef515e Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Tue, 21 Feb 2023 15:59:54 +0900 Subject: [PATCH 3/9] Add DenseThreadId Signed-off-by: Walt Karas --- include/tscpp/util/DenseThreadId.h | 116 +++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 include/tscpp/util/DenseThreadId.h diff --git a/include/tscpp/util/DenseThreadId.h b/include/tscpp/util/DenseThreadId.h new file mode 100644 index 00000000000..9189cf7428e --- /dev/null +++ b/include/tscpp/util/DenseThreadId.h @@ -0,0 +1,116 @@ +/** @file + + A replacement for std::shared_mutex with guarantees against writer starvation. + Cache contention between CPU cores is avoided except when a write lock is taken. + Assumes no thread will exit while holding mutex. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#pragma once + +#if __has_include() +// Included in core. +#include +#define L_Assert ink_assert +#include +#define L_Fatal Fatal +#else +// Should be plugin code. +#include +#define L_Assert TSAssert +#define L_Fatal TSFatal +#endif + +#include +#include +#include + +// Provide an alternate thread id, suitible for use as an array index. +// +class DenseThreadId +{ +public: + // This can onlhy be called during single-threaded initialization. + // + static void + set_num_possible_values(std::size_t num_possible_values) + { + _num_possible_values = num_possible_values; + } + + static std::size_t + self() + { + return _id.val; + } + + static std::size_t + num_possible_values() + { + return _num_possible_values; + } + +private: + inline static std::mutex _mtx; + inline static std::vector _id_stack; + inline static std::size_t _stack_top_idx; + inline static std::size_t _num_possible_values{256}; + + static void + _init() + { + _id_stack.resize(_num_possible_values); + + _stack_top_idx = 0; + for (std::size_t i{0}; i < _num_possible_values; ++i) { + _id_stack[i] = i + 1; + } + } + + struct _Id { + _Id() + { + std::unique_lock ul{_mtx}; + + if (!_inited) { + _init(); + _inited = true; + } + if (_id_stack.size() == _stack_top_idx) { + L_Fatal("DenseThreadId: number of threads exceeded maximum (%u)", unsigned(_id_stack.size())); + } + val = _stack_top_idx; + _stack_top_idx = _id_stack[_stack_top_idx]; + } + + ~_Id() + { + std::unique_lock ul{_mtx}; + + _id_stack[val] = _stack_top_idx; + _stack_top_idx = val; + } + + std::size_t val; + }; + + inline static thread_local _Id _id; + inline static bool _inited{false}; +}; From 867abb7acc2f67ee8dbfdde97773857167c14e56 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Wed, 22 Feb 2023 15:26:21 +0900 Subject: [PATCH 4/9] Use DenseThreadId --- include/tscpp/util/Bravo.h | 29 +++++++++-------------------- src/tscpp/util/Makefile.am | 6 +++++- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/include/tscpp/util/Bravo.h b/include/tscpp/util/Bravo.h index 0b52694f56e..2c0a91ef713 100644 --- a/include/tscpp/util/Bravo.h +++ b/include/tscpp/util/Bravo.h @@ -37,6 +37,10 @@ #pragma once +#include "DenseThreadId.h" + +#include "tscore/Diags.h" + #include #include #include @@ -46,14 +50,6 @@ namespace ts::bravo { -static inline uint32_t -mix32(uint64_t z) -{ - z = (z ^ (z >> 33)) * 0xff51afd7ed558ccdL; - z = (z ^ (z >> 33)) * 0xc4ceb9fe1a85ec53L; - return static_cast(z >> 32); -} - using time_point = std::chrono::time_point; #ifdef __cpp_lib_hardware_interference_size @@ -202,9 +198,9 @@ template class shared_lock You can use std::lock_guard for writers but, you can't use std::shared_lock for readers to handle ts::bravo::Token. Use ts::bravo::shared_lock for readers. - SLOT_SIZE needs to be larget than number of threads for lock_shared to go fast-path. + Set the SLOT_SIZE larger than DenseThreadId::num_possible_values to go fast-path. */ -template class shared_mutex_impl +template class shared_mutex_impl { public: shared_mutex_impl() = default; @@ -256,7 +252,7 @@ template hasher; - return mix32(hasher(std::this_thread::get_id())) % SLOT_SIZE; - } - /** Disable read bias and do revocation */ diff --git a/src/tscpp/util/Makefile.am b/src/tscpp/util/Makefile.am index 2fd87a821d3..d875ffea5f3 100644 --- a/src/tscpp/util/Makefile.am +++ b/src/tscpp/util/Makefile.am @@ -37,7 +37,10 @@ test_tscpputil_CPPFLAGS = $(AM_CPPFLAGS)\ -I$(abs_top_srcdir)/tests/include @SWOC_INCLUDES@ test_tscpputil_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) -test_tscpputil_LDADD = libtscpputil.la @SWOC_LIBS@ +test_tscpputil_LDADD = \ + $(top_builddir)/src/tscore/libtscore.la + libtscpputil.la \ + @SWOC_LIBS@ test_tscpputil_SOURCES = \ unit_tests/unit_test_main.cc \ unit_tests/test_Bravo.cc \ @@ -49,6 +52,7 @@ test_tscpputil_SOURCES = \ benchmark_shared_mutex_CPPFLAGS = $(AM_CPPFLAGS) -I$(abs_top_srcdir)/tests/include benchmark_shared_mutex_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) +benchmark_shared_mutex_LDADD = $(top_builddir)/src/tscore/libtscore.la benchmark_shared_mutex_SOURCES = unit_tests/benchmark_shared_mutex.cc clean-local: From af9914f173320500ad60aef09db5d8f312532794 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Tue, 28 Feb 2023 13:42:09 +0900 Subject: [PATCH 5/9] Fix unit test dependencies --- .gitignore | 2 +- src/tscore/Makefile.am | 7 ++++++- .../util => tscore}/unit_tests/benchmark_shared_mutex.cc | 0 src/{tscpp/util => tscore}/unit_tests/test_Bravo.cc | 0 src/tscpp/util/Makefile.am | 8 -------- 5 files changed, 7 insertions(+), 10 deletions(-) rename src/{tscpp/util => tscore}/unit_tests/benchmark_shared_mutex.cc (100%) rename src/{tscpp/util => tscore}/unit_tests/test_Bravo.cc (100%) diff --git a/.gitignore b/.gitignore index 4e7545d62d0..2a8b0aa6138 100644 --- a/.gitignore +++ b/.gitignore @@ -86,6 +86,7 @@ src/tscore/ink_autoconf.h src/tscore/ink_autoconf.h.in include/tscore/ink_config.h include/ts/apidefs.h +src/tscore/benchmark_shared_mutex src/tscore/CompileParseRules src/tscore/CompileParseRules.dSYM src/tscore/ParseRulesCType @@ -99,7 +100,6 @@ src/tscore/test_geometry src/tscore/test_Regex src/tscore/test_X509HostnameValidator src/tscore/test_tscore -src/tscpp/util/benchmark_shared_mutex src/tscpp/util/test_tscpputil src/records/test_librecords src/records/test_librecords_on_eventsystem diff --git a/src/tscore/Makefile.am b/src/tscore/Makefile.am index 96174d0d56e..018845138a8 100644 --- a/src/tscore/Makefile.am +++ b/src/tscore/Makefile.am @@ -18,7 +18,7 @@ include $(top_srcdir)/build/tidy.mk -noinst_PROGRAMS = CompileParseRules freelist_benchmark +noinst_PROGRAMS = CompileParseRules freelist_benchmark benchmark_shared_mutex check_PROGRAMS = test_geometry test_X509HostnameValidator test_tscore if EXPENSIVE_TESTS @@ -176,6 +176,7 @@ test_tscore_SOURCES = \ unit_tests/test_ArgParser.cc \ unit_tests/test_BufferWriter.cc \ unit_tests/test_BufferWriterFormat.cc \ + unit_tests/test_Bravo.cc \ unit_tests/test_CryptoHash.cc \ unit_tests/test_Extendible.cc \ unit_tests/test_Histogram.cc \ @@ -212,6 +213,10 @@ freelist_benchmark_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) -I$(abs_top_srcdi freelist_benchmark_LDADD = libtscore.la @HWLOC_LIBS@ freelist_benchmark_SOURCES = unit_tests/freelist_benchmark.cc +benchmark_shared_mutex_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) -I$(abs_top_srcdir)/tests/include +benchmark_shared_mutex_LDADD = libtscore.la +benchmark_shared_mutex_SOURCES = unit_tests/benchmark_shared_mutex.cc + CompileParseRules_SOURCES = CompileParseRules.cc CompileParseRules$(BUILD_EXEEXT): $(CompileParseRules_OBJECTS) diff --git a/src/tscpp/util/unit_tests/benchmark_shared_mutex.cc b/src/tscore/unit_tests/benchmark_shared_mutex.cc similarity index 100% rename from src/tscpp/util/unit_tests/benchmark_shared_mutex.cc rename to src/tscore/unit_tests/benchmark_shared_mutex.cc diff --git a/src/tscpp/util/unit_tests/test_Bravo.cc b/src/tscore/unit_tests/test_Bravo.cc similarity index 100% rename from src/tscpp/util/unit_tests/test_Bravo.cc rename to src/tscore/unit_tests/test_Bravo.cc diff --git a/src/tscpp/util/Makefile.am b/src/tscpp/util/Makefile.am index d875ffea5f3..794d44d633a 100644 --- a/src/tscpp/util/Makefile.am +++ b/src/tscpp/util/Makefile.am @@ -19,7 +19,6 @@ include $(top_srcdir)/build/tidy.mk check_PROGRAMS = test_tscpputil -noinst_PROGRAMS = benchmark_shared_mutex TESTS = $(check_PROGRAMS) @@ -38,23 +37,16 @@ test_tscpputil_CPPFLAGS = $(AM_CPPFLAGS)\ test_tscpputil_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) test_tscpputil_LDADD = \ - $(top_builddir)/src/tscore/libtscore.la libtscpputil.la \ @SWOC_LIBS@ test_tscpputil_SOURCES = \ unit_tests/unit_test_main.cc \ - unit_tests/test_Bravo.cc \ unit_tests/test_LocalBuffer.cc \ unit_tests/test_PostScript.cc \ unit_tests/test_Strerror.cc \ unit_tests/test_TextView.cc \ unit_tests/test_ts_meta.cc -benchmark_shared_mutex_CPPFLAGS = $(AM_CPPFLAGS) -I$(abs_top_srcdir)/tests/include -benchmark_shared_mutex_CXXFLAGS = -Wno-array-bounds $(AM_CXXFLAGS) -benchmark_shared_mutex_LDADD = $(top_builddir)/src/tscore/libtscore.la -benchmark_shared_mutex_SOURCES = unit_tests/benchmark_shared_mutex.cc - clean-local: rm -f ParseRulesCType ParseRulesCTypeToLower ParseRulesCTypeToUpper From 636d0d6eccc1c6a7268905139ac7811b57ae5f59 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Tue, 28 Feb 2023 15:02:06 +0900 Subject: [PATCH 6/9] Prevent optimizer from removing target function of benchmark --- src/tscore/unit_tests/benchmark_shared_mutex.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tscore/unit_tests/benchmark_shared_mutex.cc b/src/tscore/unit_tests/benchmark_shared_mutex.cc index a8bb22f4876..3a69bc0d0cc 100644 --- a/src/tscore/unit_tests/benchmark_shared_mutex.cc +++ b/src/tscore/unit_tests/benchmark_shared_mutex.cc @@ -49,7 +49,7 @@ Conf conf; thread_local int counter = 0; template -void +int run(T &mutex) { std::thread list[conf.nthreads]; @@ -78,6 +78,8 @@ run(T &mutex) for (int i = 0; i < conf.nthreads; i++) { list[i].join(); } + + return counter; } } // namespace @@ -90,7 +92,7 @@ TEST_CASE("Micro benchmark of shared_mutex", "") { std::shared_mutex mutex; - run>(mutex); + return run>(mutex); }; } @@ -99,7 +101,8 @@ TEST_CASE("Micro benchmark of shared_mutex", "") BENCHMARK("ts::bravo::shared_mutex") { ts::bravo::shared_mutex mutex; - run>(mutex); + + return run>(mutex); }; } } From 44c4047d98acfae1ad58a03f293c75dc4c57dd2b Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Tue, 28 Feb 2023 15:45:12 +0900 Subject: [PATCH 7/9] Make benchmark scenario better --- src/tscore/unit_tests/benchmark_shared_mutex.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tscore/unit_tests/benchmark_shared_mutex.cc b/src/tscore/unit_tests/benchmark_shared_mutex.cc index 3a69bc0d0cc..fe1a499d777 100644 --- a/src/tscore/unit_tests/benchmark_shared_mutex.cc +++ b/src/tscore/unit_tests/benchmark_shared_mutex.cc @@ -46,29 +46,30 @@ struct Conf { }; Conf conf; -thread_local int counter = 0; - template int run(T &mutex) { std::thread list[conf.nthreads]; + int counter = 0; for (int i = 0; i < conf.nthreads; i++) { - new (&list[i]) std::thread{[](T &mutex) { + new (&list[i]) std::thread{[&counter](T &mutex) { + int c = 0; for (int j = 0; j < conf.nloop; ++j) { // reader for (int i = 0; i < conf.nread; ++i) { S lock(mutex); // Do not optimize - ++counter; + c = counter; } // writer for (int i = 0; i < conf.nwrite; ++i) { std::lock_guard lock(mutex); // Do not optimize - ++counter; + ++c; + counter = c; } } }, From fce0691ef1efc163294af4a44037dbea843b4421 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Thu, 30 Mar 2023 10:39:19 +0900 Subject: [PATCH 8/9] Add constructors --- include/tscpp/util/Bravo.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/tscpp/util/Bravo.h b/include/tscpp/util/Bravo.h index 2c0a91ef713..bc1b7356e76 100644 --- a/include/tscpp/util/Bravo.h +++ b/include/tscpp/util/Bravo.h @@ -75,8 +75,10 @@ template class shared_lock public: using mutex_type = Mutex; - shared_lock() = default; + shared_lock() noexcept = default; shared_lock(Mutex &m) : _mutex(&m) { lock(); } + shared_lock(Mutex &m, std::try_to_lock_t) : _mutex(&m) { try_lock(); } + shared_lock(Mutex &m, std::defer_lock_t) noexcept : _mutex(&m) {} ~shared_lock() { From b494695ef42d313b369a050fe094ad7fbd08a753 Mon Sep 17 00:00:00 2001 From: Masaori Koshiba Date: Thu, 30 Mar 2023 11:06:41 +0900 Subject: [PATCH 9/9] Get rid of unnesessary for loop --- include/tscpp/util/Bravo.h | 52 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/include/tscpp/util/Bravo.h b/include/tscpp/util/Bravo.h index bc1b7356e76..a9155d34924 100644 --- a/include/tscpp/util/Bravo.h +++ b/include/tscpp/util/Bravo.h @@ -40,6 +40,7 @@ #include "DenseThreadId.h" #include "tscore/Diags.h" +#include "tscore/ink_assert.h" #include #include @@ -252,21 +253,20 @@ template = DenseThreadId::num_possible_values()); + // Fast path if (_mutex.read_bias.load(std::memory_order_acquire)) { - size_t index = DenseThreadId::self(); - for (size_t i = 0; i < SLOT_SIZE; ++i) { - index = (index + i) % SLOT_SIZE; - Slot &slot = _mutex.readers[index]; - bool expect = false; - if (slot.mu.compare_exchange_strong(expect, true, std::memory_order_relaxed)) { - // recheck - if (_mutex.read_bias.load(std::memory_order_acquire)) { - token = index + 1; - return; - } else { - slot.mu.store(false, std::memory_order_relaxed); - } + size_t index = DenseThreadId::self() % SLOT_SIZE; + Slot &slot = _mutex.readers[index]; + bool expect = false; + if (slot.mu.compare_exchange_strong(expect, true, std::memory_order_relaxed)) { + // recheck + if (_mutex.read_bias.load(std::memory_order_acquire)) { + token = index + 1; + return; + } else { + slot.mu.store(false, std::memory_order_relaxed); } } } @@ -281,21 +281,21 @@ template = DenseThreadId::num_possible_values()); + // Fast path if (_mutex.read_bias.load(std::memory_order_acquire)) { - size_t index = DenseThreadId::self(); - for (size_t i = 0; i < SLOT_SIZE; ++i) { - index = (index + i) % SLOT_SIZE; - Slot &slot = _mutex.readers[index]; - bool expect = false; - if (slot.mu.compare_exchange_weak(expect, true, std::memory_order_release, std::memory_order_relaxed)) { - // recheck - if (_mutex.read_bias.load(std::memory_order_acquire)) { - token = index + 1; - return true; - } else { - slot.mu.store(false, std::memory_order_relaxed); - } + size_t index = DenseThreadId::self() % SLOT_SIZE; + Slot &slot = _mutex.readers[index]; + bool expect = false; + + if (slot.mu.compare_exchange_weak(expect, true, std::memory_order_release, std::memory_order_relaxed)) { + // recheck + if (_mutex.read_bias.load(std::memory_order_acquire)) { + token = index + 1; + return true; + } else { + slot.mu.store(false, std::memory_order_relaxed); } } }