diff --git a/packages/react-native-executorch/android/CMakeLists.txt b/packages/react-native-executorch/android/CMakeLists.txt index 2a63642bbf..ddc7ab4126 100644 --- a/packages/react-native-executorch/android/CMakeLists.txt +++ b/packages/react-native-executorch/android/CMakeLists.txt @@ -21,6 +21,7 @@ string(APPEND CMAKE_CXX_FLAGS " -DRCT_NEW_ARCH_ENABLED") set(ANDROID_CPP_DIR "${CMAKE_SOURCE_DIR}/src/main/cpp") set(COMMON_CPP_DIR "${CMAKE_SOURCE_DIR}/../common") set(LIBS_DIR "${CMAKE_SOURCE_DIR}/../third-party/android/libs") +set(TOKENIZERS_DIR "${CMAKE_SOURCE_DIR}/../third-party/include/executorch/extension/llm/tokenizers/include") set(INCLUDE_DIR "${CMAKE_SOURCE_DIR}/../third-party/include") # Treat third-party headers as system headers to suppress deprecation warnings diff --git a/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt b/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt index 9452914c59..d7bd1fa870 100644 --- a/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt +++ b/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt @@ -17,6 +17,7 @@ target_include_directories( "${COMMON_CPP_DIR}" "${ANDROID_CPP_DIR}" "${INCLUDE_DIR}" + "${TOKENIZERS_DIR}" "${REACT_NATIVE_DIR}/ReactCommon" "${REACT_NATIVE_DIR}/ReactAndroid/src/main/jni/react/turbomodule" "${REACT_NATIVE_DIR}/ReactCommon/callinvoker" @@ -84,13 +85,6 @@ elseif(ANDROID_ABI STREQUAL "x86_64") set(OPENCV_THIRD_PARTY_LIBS "") endif() -# ------- tokenizers-cpp ------- - -set(TOKENIZERS_LIBS - "${LIBS_DIR}/tokenizers-cpp/${ANDROID_ABI}/libtokenizers_c.a" - "${LIBS_DIR}/tokenizers-cpp/${ANDROID_ABI}/libtokenizers_cpp.a" - "${LIBS_DIR}/tokenizers-cpp/${ANDROID_ABI}/libsentencepiece.a" -) # ------- phonemis ------- @@ -108,8 +102,6 @@ target_link_libraries( ${RN_VERSION_LINK_LIBRARIES} ${OPENCV_LIBS} ${OPENCV_THIRD_PARTY_LIBS} - ${TOKENIZERS_LIBS} - ${TOKENIZERS_THIRD_PARTY_LIBS} ${PHONEMIS_LIBS} executorch ${EXECUTORCH_LIBS} diff --git a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h index 2a95ce8475..67748d716e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h +++ b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h @@ -67,7 +67,7 @@ enum class RnExecutorchErrorCode : int32_t { WrongDimensions = 116, /** * Thrown when the input passed to our APIs is invalid, for example when - * passing an empty message aray to LLM's generate(). + * passing an empty message array to LLM's generate(). */ InvalidUserInput = 117, /** @@ -75,6 +75,10 @@ enum class RnExecutorchErrorCode : int32_t { * interruptions. */ DownloadInterrupted = 118, + /** + * Thrown when an error occurs with the tokenizer or tokenization process. + */ + TokenizerError = 167, /** * Thrown when there's a configuration mismatch between multilingual and * language settings in Speech-to-Text models. diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp index 9c08e7b918..2d50f81b99 100644 --- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp @@ -1,18 +1,28 @@ #include "TokenizerModule.h" #include "Error.h" #include "ErrorCodes.h" +#include #include #include -#include +#include +#include namespace rnexecutorch { using namespace facebook; +using namespace executorch::extension::constants; TokenizerModule::TokenizerModule( std::string source, std::shared_ptr callInvoker) - : tokenizer(tokenizers::Tokenizer::FromBlobJSON( - file_utils::loadBytesFromFile(source))), - memorySizeLowerBound(std::filesystem::file_size(source)) {} + : tokenizer(std::make_unique()), + memorySizeLowerBound(std::filesystem::file_size(source)) { + + auto status = tokenizer->load(source); + + if (status != tokenizers::Error::Ok) { + throw RnExecutorchError(RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while loading tokenizer"); + }; +} void TokenizerModule::ensureTokenizerLoaded( const std::string &methodName) const { @@ -23,31 +33,69 @@ void TokenizerModule::ensureTokenizerLoaded( } } -std::vector TokenizerModule::encode(std::string s) const { +std::vector TokenizerModule::encode(std::string s) const { ensureTokenizerLoaded("encode"); - return tokenizer->Encode(s); + + // If the used tokenizer.json has defined post_processor field, + // setting any of bos or eos arguments to value other than provided constant + // ( which is 0) will result in running the post_processor with + // 'add_special_token' flag + auto encodeResult = + tokenizer->encode(s, numOfAddedBoSTokens, numOfAddedEoSTokens); + if (!encodeResult.ok()) { + throw rnexecutorch::RnExecutorchError( + rnexecutorch::RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while encoding: " + + std::to_string(static_cast(encodeResult.error()))); + } + return encodeResult.get(); } -std::string TokenizerModule::decode(std::vector vec, +std::string TokenizerModule::decode(std::vector vec, bool skipSpecialTokens) const { ensureTokenizerLoaded("decode"); - return tokenizer->Decode(vec, skipSpecialTokens); + + auto decodeResult = tokenizer->decode(vec, skipSpecialTokens); + if (!decodeResult.ok()) { + throw RnExecutorchError( + RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while decoding: " + + std::to_string(static_cast(decodeResult.error()))); + } + + return decodeResult.get(); } size_t TokenizerModule::getVocabSize() const { ensureTokenizerLoaded("getVocabSize"); - return tokenizer->GetVocabSize(); + return static_cast(tokenizer->vocab_size()); } -std::string TokenizerModule::idToToken(int32_t tokenId) const { +std::string TokenizerModule::idToToken(uint64_t tokenId) const { ensureTokenizerLoaded("idToToken"); - return tokenizer->IdToToken(tokenId); + auto result = tokenizer->id_to_piece(tokenId); + if (!result.ok()) { + throw rnexecutorch::RnExecutorchError( + rnexecutorch::RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while trying to convert id to token: " + + std::to_string(static_cast(result.error()))); + } + return result.get(); } -int32_t TokenizerModule::tokenToId(std::string token) const { +uint64_t TokenizerModule::tokenToId(std::string token) const { ensureTokenizerLoaded("tokenToId"); - return tokenizer->TokenToId(token); + + auto result = tokenizer->piece_to_id(token); + if (!result.ok()) { + throw rnexecutorch::RnExecutorchError( + rnexecutorch::RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while trying to convert token to id: " + + std::to_string(static_cast(result.error()))); + } + return result.get(); } + std::size_t TokenizerModule::getMemoryLowerBound() const noexcept { return memorySizeLowerBound; } diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h index 5431abe71f..7089b83af5 100644 --- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h +++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h @@ -2,8 +2,8 @@ #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" #include +#include #include -#include namespace rnexecutorch { using namespace facebook; @@ -11,13 +11,13 @@ class TokenizerModule { public: explicit TokenizerModule(std::string source, std::shared_ptr callInvoker); - [[nodiscard("Registered non-void function")]] std::vector + [[nodiscard("Registered non-void function")]] std::vector encode(std::string s) const; [[nodiscard("Registered non-void function")]] std::string - decode(std::vector vec, bool skipSpecialTokens) const; + decode(std::vector vec, bool skipSpecialTokens) const; [[nodiscard("Registered non-void function")]] std::string - idToToken(int32_t tokenId) const; - [[nodiscard("Registered non-void function")]] int32_t + idToToken(uint64_t tokenId) const; + [[nodiscard("Registered non-void function")]] uint64_t tokenToId(std::string token) const; [[nodiscard("Registered non-void function")]] std::size_t getVocabSize() const; @@ -25,10 +25,10 @@ class TokenizerModule { private: void ensureTokenizerLoaded(const std::string &methodName) const; - std::unique_ptr tokenizer; + std::unique_ptr tokenizer; const std::size_t memorySizeLowerBound{0}; }; REGISTER_CONSTRUCTOR(TokenizerModule, std::string, std::shared_ptr); -} // namespace rnexecutorch \ No newline at end of file +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index de559074da..2baf922db3 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -218,6 +218,12 @@ getValue>(const jsi::Value &val, jsi::Runtime &runtime) { return getArrayAsVector(val, runtime); } +template <> +inline std::vector +getValue>(const jsi::Value &val, jsi::Runtime &runtime) { + return getArrayAsVector(val, runtime); +} + // Template specializations for std::span types template <> inline std::span getValue>(const jsi::Value &val, @@ -273,6 +279,12 @@ inline std::span getValue>(const jsi::Value &val, return getTypedArrayAsSpan(val, runtime); } +template <> +inline std::span +getValue>(const jsi::Value &val, jsi::Runtime &runtime) { + return getTypedArrayAsSpan(val, runtime); +} + // Conversion from C++ types to jsi -------------------------------------------- // Implementation functions might return any type, but in a promise we can only @@ -293,6 +305,15 @@ inline jsi::Value getJsiValue(const std::vector &vec, return {runtime, array}; } +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); + } + return {runtime, array}; +} + inline jsi::Value getJsiValue(const std::vector &vec, jsi::Runtime &runtime) { jsi::Array array(runtime, vec.size()); @@ -302,6 +323,16 @@ inline jsi::Value getJsiValue(const std::vector &vec, return {runtime, array}; } +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + array.setValueAtIndex(runtime, i, + jsi::String::createFromUtf8(runtime, vec[i])); + } + return {runtime, array}; +} + inline jsi::Value getJsiValue(const std::vector &vec, jsi::Runtime &runtime) { jsi::Array array(runtime, vec.size()); @@ -311,10 +342,28 @@ inline jsi::Value getJsiValue(const std::vector &vec, return {runtime, array}; } +// Conditional as on android, size_t and uint64_t reduce to the same type, +// introducing ambiguity +template && + !std::is_same_v>> +inline jsi::Value getJsiValue(T val, jsi::Runtime &runtime) { + return jsi::Value(static_cast(val)); +} + +inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) { + jsi::BigInt bigInt = jsi::BigInt::fromUint64(runtime, val); + return {runtime, bigInt}; +} + inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) { return {runtime, val}; } +inline jsi::Value getJsiValue(bool val, jsi::Runtime &runtime) { + return jsi::Value(val); +} + inline jsi::Value getJsiValue(const std::shared_ptr &buf, jsi::Runtime &runtime) { jsi::ArrayBuffer arrayBuffer(runtime, buf); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp index 3c81eb8e94..6299c9c400 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp @@ -37,7 +37,7 @@ SpeechToText::encode(std::span waveform) const { } std::shared_ptr -SpeechToText::decode(std::span tokens, +SpeechToText::decode(std::span tokens, std::span encoderOutput) const { std::vector decoderOutput = this->asr->decode(tokens, encoderOutput); return std::make_shared(decoderOutput); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h index d2111d3788..e206f6ca7f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h @@ -22,7 +22,7 @@ class SpeechToText { encode(std::span waveform) const; [[nodiscard( "Registered non-void function")]] std::shared_ptr - decode(std::span tokens, std::span encoderOutput) const; + decode(std::span tokens, std::span encoderOutput) const; [[nodiscard("Registered non-void function")]] std::vector transcribe(std::span waveform, std::string languageOption) const; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp index fc15bd0440..64c63e5182 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp @@ -19,15 +19,15 @@ ASR::ASR(const models::BaseModel *encoder, const models::BaseModel *decoder, endOfTranscriptionToken(this->tokenizer->tokenToId("<|endoftext|>")), timestampBeginToken(this->tokenizer->tokenToId("<|0.00|>")) {} -std::vector +std::vector ASR::getInitialSequence(const DecodingOptions &options) const { - std::vector seq; + std::vector seq; seq.push_back(this->startOfTranscriptionToken); if (options.language.has_value()) { - int32_t langToken = + uint64_t langToken = this->tokenizer->tokenToId("<|" + options.language.value() + "|>"); - int32_t taskToken = this->tokenizer->tokenToId("<|transcribe|>"); + uint64_t taskToken = this->tokenizer->tokenToId("<|transcribe|>"); seq.push_back(langToken); seq.push_back(taskToken); } @@ -41,7 +41,7 @@ GenerationResult ASR::generate(std::span waveform, float temperature, const DecodingOptions &options) const { std::vector encoderOutput = this->encode(waveform); - std::vector sequenceIds = this->getInitialSequence(options); + std::vector sequenceIds = this->getInitialSequence(options); const size_t initialSequenceLenght = sequenceIds.size(); std::vector scores; @@ -58,14 +58,14 @@ GenerationResult ASR::generate(std::span waveform, float temperature, const std::vector &probs = logits; - int32_t nextId; + uint64_t nextId; float nextProb; // intentionally comparing float to float // temperatures are predefined, so this is safe if (temperature == 0.0f) { auto maxIt = std::ranges::max_element(probs); - nextId = static_cast(std::distance(probs.begin(), maxIt)); + nextId = static_cast(std::distance(probs.begin(), maxIt)); nextProb = *maxIt; } else { std::discrete_distribution<> dist(probs.begin(), probs.end()); @@ -82,7 +82,7 @@ GenerationResult ASR::generate(std::span waveform, float temperature, } } - return {.tokens = std::vector( + return {.tokens = std::vector( sequenceIds.cbegin() + initialSequenceLenght, sequenceIds.cend()), .scores = scores}; } @@ -96,7 +96,7 @@ std::vector ASR::generateWithFallback(std::span waveform, const DecodingOptions &options) const { std::vector temperatures = {0.0f, 0.2f, 0.4f, 0.6f, 0.8f, 1.0f}; - std::vector bestTokens; + std::vector bestTokens; for (auto t : temperatures) { auto [tokens, scores] = this->generate(waveform, t, options); @@ -119,7 +119,7 @@ ASR::generateWithFallback(std::span waveform, } std::vector -ASR::calculateWordLevelTimestamps(std::span generatedTokens, +ASR::calculateWordLevelTimestamps(std::span generatedTokens, const std::span waveform) const { const size_t generatedTokensSize = generatedTokens.size(); if (generatedTokensSize < 2 || @@ -129,8 +129,8 @@ ASR::calculateWordLevelTimestamps(std::span generatedTokens, return {}; } std::vector segments; - std::vector tokens; - int32_t prevTimestamp = this->timestampBeginToken; + std::vector tokens; + uint64_t prevTimestamp = this->timestampBeginToken; for (size_t i = 0; i < generatedTokensSize; i++) { if (generatedTokens[i] < this->timestampBeginToken) { @@ -138,8 +138,8 @@ ASR::calculateWordLevelTimestamps(std::span generatedTokens, } if (i > 0 && generatedTokens[i - 1] >= this->timestampBeginToken && generatedTokens[i] >= this->timestampBeginToken) { - const int32_t start = prevTimestamp; - const int32_t end = generatedTokens[i - 1]; + const uint64_t start = prevTimestamp; + const uint64_t end = generatedTokens[i - 1]; auto words = this->estimateWordLevelTimestampsLinear(tokens, start, end); if (words.size()) { segments.emplace_back(std::move(words), 0.0); @@ -149,8 +149,8 @@ ASR::calculateWordLevelTimestamps(std::span generatedTokens, } } - const int32_t start = prevTimestamp; - const int32_t end = generatedTokens[generatedTokensSize - 2]; + const uint64_t start = prevTimestamp; + const uint64_t end = generatedTokens[generatedTokensSize - 2]; auto words = this->estimateWordLevelTimestampsLinear(tokens, start, end); if (words.size()) { @@ -174,9 +174,9 @@ ASR::calculateWordLevelTimestamps(std::span generatedTokens, } std::vector -ASR::estimateWordLevelTimestampsLinear(std::span tokens, - int32_t start, int32_t end) const { - const std::vector tokensVec(tokens.begin(), tokens.end()); +ASR::estimateWordLevelTimestampsLinear(std::span tokens, + uint64_t start, uint64_t end) const { + const std::vector tokensVec(tokens.begin(), tokens.end()); const std::string segmentText = this->tokenizer->decode(tokensVec, true); std::istringstream iss(segmentText); std::vector wordsStr; @@ -266,7 +266,7 @@ std::vector ASR::encode(std::span waveform) const { return {dataPtr, dataPtr + outputNumel}; } -std::vector ASR::decode(std::span tokens, +std::vector ASR::decode(std::span tokens, std::span encoderOutput) const { std::vector tokenShape = {1, static_cast(tokens.size())}; auto tokensLong = std::vector(tokens.begin(), tokens.end()); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h index 41d1578b44..8cdbd55226 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h @@ -17,7 +17,7 @@ class ASR { transcribe(std::span waveform, const types::DecodingOptions &options) const; std::vector encode(std::span waveform) const; - std::vector decode(std::span tokens, + std::vector decode(std::span tokens, std::span encoderOutput) const; private: @@ -25,9 +25,9 @@ class ASR { const models::BaseModel *decoder; const TokenizerModule *tokenizer; - int32_t startOfTranscriptionToken; - int32_t endOfTranscriptionToken; - int32_t timestampBeginToken; + uint64_t startOfTranscriptionToken; + uint64_t endOfTranscriptionToken; + uint64_t timestampBeginToken; // Time precision used by Whisper timestamps: each token spans 0.02 seconds constexpr static float kTimePrecision = 0.02f; @@ -44,7 +44,7 @@ class ASR { // Number of mel frames output by the encoder (derived from input spectrogram) constexpr static int32_t kNumFrames = 1500; - std::vector + std::vector getInitialSequence(const types::DecodingOptions &options) const; types::GenerationResult generate(std::span waveform, float temperature, const types::DecodingOptions &options) const; @@ -52,11 +52,11 @@ class ASR { generateWithFallback(std::span waveform, const types::DecodingOptions &options) const; std::vector - calculateWordLevelTimestamps(std::span tokens, + calculateWordLevelTimestamps(std::span tokens, std::span waveform) const; std::vector - estimateWordLevelTimestampsLinear(std::span tokens, - int32_t start, int32_t end) const; + estimateWordLevelTimestampsLinear(std::span tokens, + uint64_t start, uint64_t end) const; float getCompressionRatio(const std::string &text) const; }; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h index efd5204420..83bc80dd77 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h @@ -5,7 +5,7 @@ namespace rnexecutorch::models::speech_to_text::types { struct GenerationResult { - std::vector tokens; + std::vector tokens; std::vector scores; }; diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index 215a33c6f3..b0c99586aa 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -21,6 +21,7 @@ set(MONOREPO_ROOT "${PACKAGE_ROOT}/../..") set(THIRD_PARTY_DIR "${MONOREPO_ROOT}/third-party") set(REACT_NATIVE_DIR "${MONOREPO_ROOT}/node_modules/react-native") set(ANDROID_THIRD_PARTY "${PACKAGE_ROOT}/third-party/android/libs/") +set(TOKENIZERS_DIR "${PACKAGE_ROOT}/third-party/include/executorch/extension/llm/tokenizers/include") # Add Gtest as a subdirectory add_subdirectory(${THIRD_PARTY_DIR}/googletest ${PROJECT_BINARY_DIR}/googletest) @@ -80,14 +81,8 @@ target_link_libraries(opencv_deps INTERFACE ) target_link_options(opencv_deps INTERFACE -fopenmp -static-openmp) -# Tokenizers (Interface Library) -set(TOKENIZERS_LIBS_DIR "${ANDROID_THIRD_PARTY}/tokenizers-cpp/${ANDROID_ABI}") add_library(tokenizers_deps INTERFACE) -target_link_libraries(tokenizers_deps INTERFACE - ${TOKENIZERS_LIBS_DIR}/libtokenizers_cpp.a - ${TOKENIZERS_LIBS_DIR}/libtokenizers_c.a - ${TOKENIZERS_LIBS_DIR}/libsentencepiece.a -) +target_include_directories(tokenizers_deps INTERFACE "${TOKENIZERS_DIR}") # Source Definitions set(CORE_SOURCES @@ -110,6 +105,7 @@ add_library(rntests_core STATIC ${CORE_SOURCES}) target_include_directories(rntests_core PUBLIC ${RNEXECUTORCH_DIR}/data_processing + ${TOKENIZERS_DIR} ${RNEXECUTORCH_DIR} ${COMMON_DIR} ${PACKAGE_ROOT}/third-party/include diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh b/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh index 88fdc8e0ae..6f2de5228b 100755 --- a/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh +++ b/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh @@ -80,7 +80,7 @@ REQUIRED_LIBS=( # Get libc++_shared.so directly from NDK MONOREPO_ROOT="$PACKAGE_ROOT/../../.." -LIBFBJNI_PATH="$SCRIPT_DIR/../integration/libs/libfbjni.so" +LIBFBJNI_PATH="$SCRIPT_DIR/integration/libs/libfbjni.so" if [ -z "$LIBFBJNI_PATH" ]; then echo "Error: libfbjni.so not found." diff --git a/packages/react-native-executorch/common/runner/constants.h b/packages/react-native-executorch/common/runner/constants.h index d7b3607775..93ac6a8769 100644 --- a/packages/react-native-executorch/common/runner/constants.h +++ b/packages/react-native-executorch/common/runner/constants.h @@ -7,7 +7,7 @@ */ #pragma once // constants for LLM runtime -namespace executorch::extension::llm { +namespace executorch::extension::constants { // Runtime metadata key constants inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape"; @@ -25,4 +25,6 @@ inline constexpr auto kAudioEncoderMethod = "audio_encoder"; inline constexpr auto kTokenEmbeddingMethod = "token_embedding"; inline constexpr auto kTextModelMethod = "text_decoder"; -} // namespace executorch::extension::llm +inline constexpr auto numOfAddedBoSTokens = 0; +inline constexpr auto numOfAddedEoSTokens = 0; +} // namespace executorch::extension::constants diff --git a/packages/react-native-executorch/common/runner/runner.cpp b/packages/react-native-executorch/common/runner/runner.cpp index 3b24d6ec23..5d0fec78c2 100644 --- a/packages/react-native-executorch/common/runner/runner.cpp +++ b/packages/react-native-executorch/common/runner/runner.cpp @@ -11,31 +11,23 @@ // The module takes in a string as input and emits a string as output. #include "runner.h" +#include "constants.h" #include "util.h" +#include #include -#include -#include -#include +#include namespace example { +using namespace executorch::extension::constants; using ::executorch::extension::Module; using ::executorch::runtime::Error; using ::executorch::runtime::Result; -namespace { -static constexpr auto kEnableDynamicShape = "enable_dynamic_shape"; -static constexpr auto kEosIds = "get_eos_ids"; -static constexpr auto kMaxSeqLen = "get_max_seq_len"; -static constexpr auto kMaxContextLen = "get_max_context_len"; -static constexpr auto kVocabSize = "get_vocab_size"; -static constexpr auto kUseKVCache = "use_kv_cache"; -static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache"; -} // namespace - Runner::Runner(Module *module, const std::string &tokenizer_path, const llm::GenerationConfig &config) : config_(config), module_(module), tokenizer_path_(tokenizer_path), + tokenizer_(std::make_unique()), metadata_({ {kEnableDynamicShape, false}, {kMaxSeqLen, 128}, @@ -45,8 +37,8 @@ Runner::Runner(Module *module, const std::string &tokenizer_path, }) {} bool Runner::is_loaded() const { - return module_->is_loaded() && tokenizer_ && text_decoder_runner_ && - text_prefiller_ && text_token_generator_; + return module_->is_loaded() && tokenizer_->is_loaded() && + text_decoder_runner_ && text_prefiller_ && text_token_generator_; } Error Runner::load() { @@ -54,16 +46,20 @@ Error Runner::load() { return Error::Ok; } - ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward")); + auto status = tokenizer_->load(tokenizer_path_); - // Load tokenizer. - auto blob = rnexecutorch::file_utils::loadBytesFromFile(tokenizer_path_); - tokenizer_ = tokenizers::Tokenizer::FromBlobJSON(blob); + if (status != tokenizers::Error::Ok) { + throw rnexecutorch::RnExecutorchError( + rnexecutorch::RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while loading tokenizer"); + }; + + ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward")); ET_LOG(Info, "Reading metadata from model"); auto eos_ids = std::make_unique>(); - metadata_[kVocabSize] = tokenizer_->GetVocabSize(); + metadata_[kVocabSize] = tokenizer_->vocab_size(); // Load model metadata const auto method_names = @@ -188,7 +184,20 @@ Error Runner::generate(const std::string &prompt, int64_t context_len_left = static_cast(max_context_length) - pos_; - std::vector prompt_tokens = tokenizer_->Encode(prompt); + // If the used tokenizer.json has defined post_processor field, + // setting any of bos or eos arguments to value other than provided constant + // ( which is 0) will result in running the post_processor with + // 'add_special_token' flag + auto encodeResult = + tokenizer_->encode(prompt, numOfAddedBoSTokens, numOfAddedEoSTokens); + if (!encodeResult.ok()) { + throw rnexecutorch::RnExecutorchError( + rnexecutorch::RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while encoding: " + + std::to_string(static_cast(encodeResult.error()))); + } + std::vector prompt_tokens = encodeResult.get(); + std::vector prompt_tokens_uint64(prompt_tokens.begin(), prompt_tokens.end()); @@ -231,10 +240,14 @@ Error Runner::generate(const std::string &prompt, stats_.prompt_eval_end_ms = llm::time_in_ms(); ET_CHECK_OK_OR_RETURN_ERROR(prefill_res.error()); uint64_t cur_token = prefill_res.get(); - - // print the first token from prefill. No prev_token so use cur_token for it. - const std::string cur_decoded = - tokenizer_->Decode(std::vector{static_cast(cur_token)}); + auto decodeResult = tokenizer_->decode({cur_token}); + if (!decodeResult.ok()) { + throw rnexecutorch::RnExecutorchError( + rnexecutorch::RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while decoding: " + + std::to_string(static_cast(decodeResult.error()))); + } + const std::string cur_decoded = decodeResult.get(); RUNNER_ET_LOG(generation_config.warming, "RSS after prompt prefill: %f MiB (0 if unsupported)", llm::get_rss_bytes() / 1024.0 / 1024.0); diff --git a/packages/react-native-executorch/common/runner/runner.h b/packages/react-native-executorch/common/runner/runner.h index ec0256a87e..f6be4f3067 100644 --- a/packages/react-native-executorch/common/runner/runner.h +++ b/packages/react-native-executorch/common/runner/runner.h @@ -21,8 +21,8 @@ #include #include #include +#include #include -#include #include namespace example { @@ -74,7 +74,7 @@ class Runner : public llm::IRunner { // Subcomponents std::string tokenizer_path_; - std::unique_ptr tokenizer_; + std::unique_ptr tokenizer_; std::unordered_map metadata_; std::unique_ptr io_manager_; std::unique_ptr text_decoder_runner_; diff --git a/packages/react-native-executorch/common/runner/text_token_generator.h b/packages/react-native-executorch/common/runner/text_token_generator.h index 712945b941..7b0dd30426 100644 --- a/packages/react-native-executorch/common/runner/text_token_generator.h +++ b/packages/react-native-executorch/common/runner/text_token_generator.h @@ -13,7 +13,8 @@ #include "text_decoder_runner.h" #include "util.h" #include -#include +#include +#include namespace executorch { namespace extension { @@ -21,7 +22,7 @@ namespace llm { class TextTokenGenerator { public: - TextTokenGenerator(::tokenizers::Tokenizer *tokenizer, + TextTokenGenerator(tokenizers::HFTokenizer *tokenizer, TextDecoderRunner *text_decoder_runner, bool use_kv_cache, std::unique_ptr> &&eos_ids, Stats *stats) @@ -42,7 +43,7 @@ class TextTokenGenerator { * @return how many tokens are generated. */ inline ::executorch::runtime::Result generate( - std::vector tokens, int64_t start_pos, int32_t max_new_tokens, + std::vector tokens, int64_t start_pos, uint64_t max_new_tokens, float temperature, float topp, const std::function &token_callback = {}) { ET_CHECK_MSG(!tokens.empty(), @@ -57,9 +58,9 @@ class TextTokenGenerator { [[maybe_unused]] uint64_t prev_token; // cache to keep tokens if they were decoded into illegal character - std::vector token_cache; + std::vector token_cache; // add first token after prefill to cache here - token_cache.push_back(static_cast(cur_token)); + token_cache.push_back(static_cast(cur_token)); if (use_kv_cache_) { // hard code these to size 1 as kv cache is locked to static size right @@ -106,11 +107,20 @@ class TextTokenGenerator { tokens_managed, {1, static_cast(token_data.size())})); } - token_cache.push_back(static_cast(cur_token)); + token_cache.push_back(static_cast(cur_token)); // print the token as string, decode it with the Tokenizer object - const std::string cache_decoded = tokenizer_->Decode(token_cache); - + // We pass false, as we want don't want to skip special tokens e.g. + // + + auto decodeResult = tokenizer_->decode(token_cache, false); + if (!decodeResult.ok()) { + throw rnexecutorch::RnExecutorchError( + rnexecutorch::RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occured while decoding: " + + std::to_string(static_cast(decodeResult.error()))); + } + std::string cache_decoded = decodeResult.get(); const auto timeIntervalElapsed = std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - timestamp_) > @@ -177,7 +187,7 @@ class TextTokenGenerator { * externally, likely in the Runner. This class assumes that the provided * pointers remain valid for the duration of its use. */ - ::tokenizers::Tokenizer *tokenizer_; + tokenizers::HFTokenizer *tokenizer_; TextDecoderRunner *text_decoder_runner_; std::unique_ptr> eos_ids_; bool use_kv_cache_; diff --git a/packages/react-native-executorch/react-native-executorch.podspec b/packages/react-native-executorch/react-native-executorch.podspec index f3aa9682d3..4094d8815d 100644 --- a/packages/react-native-executorch/react-native-executorch.podspec +++ b/packages/react-native-executorch/react-native-executorch.podspec @@ -13,7 +13,6 @@ Pod::Spec.new do |s| s.platforms = { :ios => min_ios_version_supported } s.source = { :git => "https://github.com/software-mansion/react-native-executorch.git", :tag => "#{s.version}" } - tokenizers_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/third-party/ios/libs/tokenizers-cpp', __dir__) pthreadpool_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/third-party/ios/libs/pthreadpool', __dir__) cpuinfo_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/third-party/ios/libs/cpuinfo', __dir__) @@ -24,9 +23,6 @@ Pod::Spec.new do |s| "OTHER_LDFLAGS[sdk=iphoneos*]" => [ '$(inherited)', - "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_cpp.a\"", - "\"#{tokenizers_binaries_path}/physical-arm64-release/libsentencepiece.a\"", - "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_c.a\"", "\"#{pthreadpool_binaries_path}/physical-arm64-release/libpthreadpool.a\"", "\"#{cpuinfo_binaries_path}/libcpuinfo.a\"", "\"#{phonemis_binaries_path}/physical-arm64-release/libphonemis.a\"", @@ -35,9 +31,6 @@ Pod::Spec.new do |s| "OTHER_LDFLAGS[sdk=iphonesimulator*]" => [ '$(inherited)', - "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_cpp.a\"", - "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libsentencepiece.a\"", - "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_c.a\"", "\"#{pthreadpool_binaries_path}/simulator-arm64-debug/libpthreadpool.a\"", "\"#{cpuinfo_binaries_path}/libcpuinfo.a\"", "\"#{phonemis_binaries_path}/simulator-arm64-debug/libphonemis.a\"", @@ -50,6 +43,7 @@ Pod::Spec.new do |s| "USE_HEADERMAP" => "YES", "HEADER_SEARCH_PATHS" => '"$(PODS_TARGET_SRCROOT)/ios" '+ + '"$(PODS_TARGET_SRCROOT)/third-party/include/executorch/extension/llm/tokenizers/include" '+ '"$(PODS_TARGET_SRCROOT)/third-party/include" '+ '"$(PODS_TARGET_SRCROOT)/common" ', "CLANG_CXX_LANGUAGE_STANDARD" => "c++20", diff --git a/packages/react-native-executorch/src/controllers/LLMController.ts b/packages/react-native-executorch/src/controllers/LLMController.ts index 5c768aab4a..a05458c5a0 100644 --- a/packages/react-native-executorch/src/controllers/LLMController.ts +++ b/packages/react-native-executorch/src/controllers/LLMController.ts @@ -347,7 +347,7 @@ export class LLMController { ): string { if (!tokenizerConfig.chat_template) { throw new RnExecutorchError( - RnExecutorchErrorCode.InvalidConfig, + RnExecutorchErrorCode.TokenizerError, "Tokenizer config doesn't include chat_template" ); } diff --git a/packages/react-native-executorch/src/errors/ErrorCodes.ts b/packages/react-native-executorch/src/errors/ErrorCodes.ts index a3e6e28ee0..5febda61b3 100644 --- a/packages/react-native-executorch/src/errors/ErrorCodes.ts +++ b/packages/react-native-executorch/src/errors/ErrorCodes.ts @@ -58,6 +58,10 @@ export enum RnExecutorchErrorCode { * Thrown when the number of downloaded files is unexpected, due to download interruptions. */ DownloadInterrupted = 118, + /** + * Thrown when an error occurs with the tokenizer or tokenization process. + */ + TokenizerError = 167, /** * Thrown when there's a configuration mismatch between multilingual and language settings in Speech-to-Text models. */ diff --git a/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so b/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so old mode 100755 new mode 100644 index 78a4304d1c..2a1b99c1bc Binary files a/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so and b/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so differ diff --git a/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so b/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so old mode 100755 new mode 100644 index aa36de6cb5..95400e10fd Binary files a/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so and b/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so differ diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a b/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a deleted file mode 100644 index cde6ef74e5..0000000000 Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a and /dev/null differ diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a b/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a deleted file mode 100644 index d73a94d890..0000000000 Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a and /dev/null differ diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a b/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a deleted file mode 100644 index 5a203b4459..0000000000 Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a and /dev/null differ diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a b/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a deleted file mode 100644 index 77841f9850..0000000000 Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a and /dev/null differ diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a b/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a deleted file mode 100644 index 8fc8cdfdc5..0000000000 Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a and /dev/null differ diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a b/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a deleted file mode 100644 index 1ce43b7a9d..0000000000 Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a and /dev/null differ diff --git a/packages/react-native-executorch/third-party/include/absl/base/attributes.h b/packages/react-native-executorch/third-party/include/absl/base/attributes.h new file mode 100644 index 0000000000..db9949ae24 --- /dev/null +++ b/packages/react-native-executorch/third-party/include/absl/base/attributes.h @@ -0,0 +1,998 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This header file defines macros for declaring attributes for functions, +// types, and variables. +// +// These macros are used within Abseil and allow the compiler to optimize, where +// applicable, certain function calls. +// +// Most macros here are exposing GCC or Clang features, and are stubbed out for +// other compilers. +// +// GCC attributes documentation: +// https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Function-Attributes.html +// https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Variable-Attributes.html +// https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Type-Attributes.html +// +// Most attributes in this file are already supported by GCC 4.7. However, some +// of them are not supported in older version of Clang. Thus, we check +// `__has_attribute()` first. If the check fails, we check if we are on GCC and +// assume the attribute exists on GCC (which is verified on GCC 4.7). + +// SKIP_ABSL_INLINE_NAMESPACE_CHECK + +#ifndef ABSL_BASE_ATTRIBUTES_H_ +#define ABSL_BASE_ATTRIBUTES_H_ + +#include "absl/base/config.h" + +// ABSL_HAVE_ATTRIBUTE +// +// A function-like feature checking macro that is a wrapper around +// `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a +// nonzero constant integer if the attribute is supported or 0 if not. +// +// It evaluates to zero if `__has_attribute` is not defined by the compiler. +// +// GCC: https://gcc.gnu.org/gcc-5/changes.html +// Clang: https://clang.llvm.org/docs/LanguageExtensions.html +#ifdef __has_attribute +#define ABSL_HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define ABSL_HAVE_ATTRIBUTE(x) 0 +#endif + +// ABSL_HAVE_CPP_ATTRIBUTE +// +// A function-like feature checking macro that accepts C++11 style attributes. +// It's a wrapper around `__has_cpp_attribute`, defined by ISO C++ SD-6 +// (https://en.cppreference.com/w/cpp/experimental/feature_test). If we don't +// find `__has_cpp_attribute`, will evaluate to 0. +#if defined(__cplusplus) && defined(__has_cpp_attribute) +// NOTE: requiring __cplusplus above should not be necessary, but +// works around https://bugs.llvm.org/show_bug.cgi?id=23435. +#define ABSL_HAVE_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +#define ABSL_HAVE_CPP_ATTRIBUTE(x) 0 +#endif + +// ----------------------------------------------------------------------------- +// Function Attributes +// ----------------------------------------------------------------------------- +// +// GCC: https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html +// Clang: https://clang.llvm.org/docs/AttributeReference.html + +// ABSL_PRINTF_ATTRIBUTE +// ABSL_SCANF_ATTRIBUTE +// +// Tells the compiler to perform `printf` format string checking if the +// compiler supports it; see the 'format' attribute in +// . +// +// Note: As the GCC manual states, "[s]ince non-static C++ methods +// have an implicit 'this' argument, the arguments of such methods +// should be counted from two, not one." +#if ABSL_HAVE_ATTRIBUTE(format) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_PRINTF_ATTRIBUTE(string_index, first_to_check) \ + __attribute__((__format__(__printf__, string_index, first_to_check))) +#define ABSL_SCANF_ATTRIBUTE(string_index, first_to_check) \ + __attribute__((__format__(__scanf__, string_index, first_to_check))) +#else +#define ABSL_PRINTF_ATTRIBUTE(string_index, first_to_check) +#define ABSL_SCANF_ATTRIBUTE(string_index, first_to_check) +#endif + +// ABSL_ATTRIBUTE_ALWAYS_INLINE +// ABSL_ATTRIBUTE_NOINLINE +// +// Forces functions to either inline or not inline. Introduced in gcc 3.1. +#if ABSL_HAVE_ATTRIBUTE(always_inline) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) +#define ABSL_HAVE_ATTRIBUTE_ALWAYS_INLINE 1 +#else +#define ABSL_ATTRIBUTE_ALWAYS_INLINE +#endif + +#if ABSL_HAVE_ATTRIBUTE(noinline) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_NOINLINE __attribute__((noinline)) +#define ABSL_HAVE_ATTRIBUTE_NOINLINE 1 +#else +#define ABSL_ATTRIBUTE_NOINLINE +#endif + +// ABSL_ATTRIBUTE_NO_TAIL_CALL +// +// Prevents the compiler from optimizing away stack frames for functions which +// end in a call to another function. +#if ABSL_HAVE_ATTRIBUTE(disable_tail_calls) +#define ABSL_HAVE_ATTRIBUTE_NO_TAIL_CALL 1 +#define ABSL_ATTRIBUTE_NO_TAIL_CALL __attribute__((disable_tail_calls)) +#elif defined(__GNUC__) && !defined(__clang__) && !defined(__e2k__) +#define ABSL_HAVE_ATTRIBUTE_NO_TAIL_CALL 1 +#define ABSL_ATTRIBUTE_NO_TAIL_CALL \ + __attribute__((optimize("no-optimize-sibling-calls"))) +#else +#define ABSL_ATTRIBUTE_NO_TAIL_CALL +#define ABSL_HAVE_ATTRIBUTE_NO_TAIL_CALL 0 +#endif + +// ABSL_ATTRIBUTE_WEAK +// +// Tags a function as weak for the purposes of compilation and linking. +// Weak attributes did not work properly in LLVM's Windows backend before +// 9.0.0, so disable them there. See https://bugs.llvm.org/show_bug.cgi?id=37598 +// for further information. Weak attributes do not work across DLL boundary. +// The MinGW compiler doesn't complain about the weak attribute until the link +// step, presumably because Windows doesn't use ELF binaries. +#if (ABSL_HAVE_ATTRIBUTE(weak) || \ + (defined(__GNUC__) && !defined(__clang__))) && \ + (!defined(_WIN32) || \ + (defined(__clang__) && __clang_major__ >= 9 && \ + !defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL))) && \ + !defined(__MINGW32__) +#undef ABSL_ATTRIBUTE_WEAK +#define ABSL_ATTRIBUTE_WEAK __attribute__((weak)) +#define ABSL_HAVE_ATTRIBUTE_WEAK 1 +#else +#define ABSL_ATTRIBUTE_WEAK +#define ABSL_HAVE_ATTRIBUTE_WEAK 0 +#endif + +// ABSL_ATTRIBUTE_NONNULL +// +// Tells the compiler either (a) that a particular function parameter +// should be a non-null pointer, or (b) that all pointer arguments should +// be non-null. +// +// Note: As the GCC manual states, "[s]ince non-static C++ methods +// have an implicit 'this' argument, the arguments of such methods +// should be counted from two, not one." +// +// Args are indexed starting at 1. +// +// For non-static class member functions, the implicit `this` argument +// is arg 1, and the first explicit argument is arg 2. For static class member +// functions, there is no implicit `this`, and the first explicit argument is +// arg 1. +// +// Example: +// +// /* arg_a cannot be null, but arg_b can */ +// void Function(void* arg_a, void* arg_b) ABSL_ATTRIBUTE_NONNULL(1); +// +// class C { +// /* arg_a cannot be null, but arg_b can */ +// void Method(void* arg_a, void* arg_b) ABSL_ATTRIBUTE_NONNULL(2); +// +// /* arg_a cannot be null, but arg_b can */ +// static void StaticMethod(void* arg_a, void* arg_b) +// ABSL_ATTRIBUTE_NONNULL(1); +// }; +// +// If no arguments are provided, then all pointer arguments should be non-null. +// +// /* No pointer arguments may be null. */ +// void Function(void* arg_a, void* arg_b, int arg_c) ABSL_ATTRIBUTE_NONNULL(); +// +// NOTE: The GCC nonnull attribute actually accepts a list of arguments, but +// ABSL_ATTRIBUTE_NONNULL does not. +#if ABSL_HAVE_ATTRIBUTE(nonnull) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_NONNULL(arg_index) __attribute__((nonnull(arg_index))) +#else +#define ABSL_ATTRIBUTE_NONNULL(...) +#endif + +// ABSL_ATTRIBUTE_NORETURN +// +// Tells the compiler that a given function never returns. +// +// Deprecated: Prefer the `[[noreturn]]` attribute standardized by C++11 over +// this macro. +#if ABSL_HAVE_ATTRIBUTE(noreturn) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_NORETURN __attribute__((noreturn)) +#elif defined(_MSC_VER) +#define ABSL_ATTRIBUTE_NORETURN __declspec(noreturn) +#else +#define ABSL_ATTRIBUTE_NORETURN +#endif + +// ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS +// +// Tells the AddressSanitizer (or other memory testing tools) to ignore a given +// function. Useful for cases when a function reads random locations on stack, +// calls _exit from a cloned subprocess, deliberately accesses buffer +// out of bounds or does other scary things with memory. +// NOTE: GCC supports AddressSanitizer(asan) since 4.8. +// https://gcc.gnu.org/gcc-4.8/changes.html +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) && \ + ABSL_HAVE_ATTRIBUTE(no_sanitize_address) +#define ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address)) +#elif defined(ABSL_HAVE_ADDRESS_SANITIZER) && defined(_MSC_VER) && \ + _MSC_VER >= 1928 +// https://docs.microsoft.com/en-us/cpp/cpp/no-sanitize-address +#define ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS __declspec(no_sanitize_address) +#elif defined(ABSL_HAVE_HWADDRESS_SANITIZER) && ABSL_HAVE_ATTRIBUTE(no_sanitize) +// HWAddressSanitizer is a sanitizer similar to AddressSanitizer, which uses CPU +// features to detect similar bugs with less CPU and memory overhead. +// NOTE: GCC supports HWAddressSanitizer(hwasan) since 11. +// https://gcc.gnu.org/gcc-11/changes.html +#define ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS \ + __attribute__((no_sanitize("hwaddress"))) +#else +#define ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS +#endif + +// ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY +// +// Tells the MemorySanitizer to relax the handling of a given function. All "Use +// of uninitialized value" warnings from such functions will be suppressed, and +// all values loaded from memory will be considered fully initialized. This +// attribute is similar to the ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS attribute +// above, but deals with initialized-ness rather than addressability issues. +// NOTE: MemorySanitizer(msan) is supported by Clang but not GCC. +#if ABSL_HAVE_ATTRIBUTE(no_sanitize_memory) +#define ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory)) +#else +#define ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY +#endif + +// ABSL_ATTRIBUTE_NO_SANITIZE_THREAD +// +// Tells the ThreadSanitizer to not instrument a given function. +// NOTE: GCC supports ThreadSanitizer(tsan) since 4.8. +// https://gcc.gnu.org/gcc-4.8/changes.html +#if ABSL_HAVE_ATTRIBUTE(no_sanitize_thread) +#define ABSL_ATTRIBUTE_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) +#else +#define ABSL_ATTRIBUTE_NO_SANITIZE_THREAD +#endif + +// ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED +// +// Tells the UndefinedSanitizer to ignore a given function. Useful for cases +// where certain behavior (eg. division by zero) is being used intentionally. +// NOTE: GCC supports UndefinedBehaviorSanitizer(ubsan) since 4.9. +// https://gcc.gnu.org/gcc-4.9/changes.html +#if ABSL_HAVE_ATTRIBUTE(no_sanitize_undefined) +#define ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED \ + __attribute__((no_sanitize_undefined)) +#elif ABSL_HAVE_ATTRIBUTE(no_sanitize) +#define ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED \ + __attribute__((no_sanitize("undefined"))) +#else +#define ABSL_ATTRIBUTE_NO_SANITIZE_UNDEFINED +#endif + +// ABSL_ATTRIBUTE_NO_SANITIZE_CFI +// +// Tells the ControlFlowIntegrity sanitizer to not instrument a given function. +// See https://clang.llvm.org/docs/ControlFlowIntegrity.html for details. +#if ABSL_HAVE_ATTRIBUTE(no_sanitize) && defined(__llvm__) +#define ABSL_ATTRIBUTE_NO_SANITIZE_CFI __attribute__((no_sanitize("cfi"))) +#else +#define ABSL_ATTRIBUTE_NO_SANITIZE_CFI +#endif + +// ABSL_ATTRIBUTE_NO_SANITIZE_SAFESTACK +// +// Tells the SafeStack to not instrument a given function. +// See https://clang.llvm.org/docs/SafeStack.html for details. +#if ABSL_HAVE_ATTRIBUTE(no_sanitize) +#define ABSL_ATTRIBUTE_NO_SANITIZE_SAFESTACK \ + __attribute__((no_sanitize("safe-stack"))) +#else +#define ABSL_ATTRIBUTE_NO_SANITIZE_SAFESTACK +#endif + +// ABSL_ATTRIBUTE_RETURNS_NONNULL +// +// Tells the compiler that a particular function never returns a null pointer. +#if ABSL_HAVE_ATTRIBUTE(returns_nonnull) +#define ABSL_ATTRIBUTE_RETURNS_NONNULL __attribute__((returns_nonnull)) +#else +#define ABSL_ATTRIBUTE_RETURNS_NONNULL +#endif + +// ABSL_HAVE_ATTRIBUTE_SECTION +// +// Indicates whether labeled sections are supported. Weak symbol support is +// a prerequisite. Labeled sections are not supported on Darwin/iOS. +#ifdef ABSL_HAVE_ATTRIBUTE_SECTION +#error ABSL_HAVE_ATTRIBUTE_SECTION cannot be directly set +#elif (ABSL_HAVE_ATTRIBUTE(section) || \ + (defined(__GNUC__) && !defined(__clang__))) && \ + !defined(__APPLE__) && ABSL_HAVE_ATTRIBUTE_WEAK +#define ABSL_HAVE_ATTRIBUTE_SECTION 1 + +// ABSL_ATTRIBUTE_SECTION +// +// Tells the compiler/linker to put a given function into a section and define +// `__start_ ## name` and `__stop_ ## name` symbols to bracket the section. +// This functionality is supported by GNU linker. Any function annotated with +// `ABSL_ATTRIBUTE_SECTION` must not be inlined, or it will be placed into +// whatever section its caller is placed into. +// +#ifndef ABSL_ATTRIBUTE_SECTION +#define ABSL_ATTRIBUTE_SECTION(name) \ + __attribute__((section(#name))) __attribute__((noinline)) +#endif + +// ABSL_ATTRIBUTE_SECTION_VARIABLE +// +// Tells the compiler/linker to put a given variable into a section and define +// `__start_ ## name` and `__stop_ ## name` symbols to bracket the section. +// This functionality is supported by GNU linker. +#ifndef ABSL_ATTRIBUTE_SECTION_VARIABLE +#ifdef _AIX +// __attribute__((section(#name))) on AIX is achieved by using the `.csect` +// psudo op which includes an additional integer as part of its syntax indcating +// alignment. If data fall under different alignments then you might get a +// compilation error indicating a `Section type conflict`. +#define ABSL_ATTRIBUTE_SECTION_VARIABLE(name) +#else +#define ABSL_ATTRIBUTE_SECTION_VARIABLE(name) __attribute__((section(#name))) +#endif +#endif + +// ABSL_DECLARE_ATTRIBUTE_SECTION_VARS +// +// A weak section declaration to be used as a global declaration +// for ABSL_ATTRIBUTE_SECTION_START|STOP(name) to compile and link +// even without functions with ABSL_ATTRIBUTE_SECTION(name). +// ABSL_DEFINE_ATTRIBUTE_SECTION should be in the exactly one file; it's +// a no-op on ELF but not on Mach-O. +// +#ifndef ABSL_DECLARE_ATTRIBUTE_SECTION_VARS +#define ABSL_DECLARE_ATTRIBUTE_SECTION_VARS(name) \ + extern char __start_##name[] ABSL_ATTRIBUTE_WEAK; \ + extern char __stop_##name[] ABSL_ATTRIBUTE_WEAK +#endif +#ifndef ABSL_DEFINE_ATTRIBUTE_SECTION_VARS +#define ABSL_INIT_ATTRIBUTE_SECTION_VARS(name) +#define ABSL_DEFINE_ATTRIBUTE_SECTION_VARS(name) +#endif + +// ABSL_ATTRIBUTE_SECTION_START +// +// Returns `void*` pointers to start/end of a section of code with +// functions having ABSL_ATTRIBUTE_SECTION(name). +// Returns 0 if no such functions exist. +// One must ABSL_DECLARE_ATTRIBUTE_SECTION_VARS(name) for this to compile and +// link. +// +#define ABSL_ATTRIBUTE_SECTION_START(name) \ + (reinterpret_cast(__start_##name)) +#define ABSL_ATTRIBUTE_SECTION_STOP(name) \ + (reinterpret_cast(__stop_##name)) + +#else // !ABSL_HAVE_ATTRIBUTE_SECTION + +#define ABSL_HAVE_ATTRIBUTE_SECTION 0 + +// provide dummy definitions +#define ABSL_ATTRIBUTE_SECTION(name) +#define ABSL_ATTRIBUTE_SECTION_VARIABLE(name) +#define ABSL_INIT_ATTRIBUTE_SECTION_VARS(name) +#define ABSL_DEFINE_ATTRIBUTE_SECTION_VARS(name) +#define ABSL_DECLARE_ATTRIBUTE_SECTION_VARS(name) +#define ABSL_ATTRIBUTE_SECTION_START(name) (reinterpret_cast(0)) +#define ABSL_ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast(0)) + +#endif // ABSL_ATTRIBUTE_SECTION + +// ABSL_ATTRIBUTE_STACK_ALIGN_FOR_OLD_LIBC +// +// Support for aligning the stack on 32-bit x86. +#if ABSL_HAVE_ATTRIBUTE(force_align_arg_pointer) || \ + (defined(__GNUC__) && !defined(__clang__)) +#if defined(__i386__) +#define ABSL_ATTRIBUTE_STACK_ALIGN_FOR_OLD_LIBC \ + __attribute__((force_align_arg_pointer)) +#define ABSL_REQUIRE_STACK_ALIGN_TRAMPOLINE (0) +#elif defined(__x86_64__) +#define ABSL_REQUIRE_STACK_ALIGN_TRAMPOLINE (1) +#define ABSL_ATTRIBUTE_STACK_ALIGN_FOR_OLD_LIBC +#else // !__i386__ && !__x86_64 +#define ABSL_REQUIRE_STACK_ALIGN_TRAMPOLINE (0) +#define ABSL_ATTRIBUTE_STACK_ALIGN_FOR_OLD_LIBC +#endif // __i386__ +#else +#define ABSL_ATTRIBUTE_STACK_ALIGN_FOR_OLD_LIBC +#define ABSL_REQUIRE_STACK_ALIGN_TRAMPOLINE (0) +#endif + +// ABSL_MUST_USE_RESULT +// +// Tells the compiler to warn about unused results. +// +// For code or headers that are assured to only build with C++17 and up, prefer +// just using the standard `[[nodiscard]]` directly over this macro. +// +// When annotating a function, it must appear as the first part of the +// declaration or definition. The compiler will warn if the return value from +// such a function is unused: +// +// ABSL_MUST_USE_RESULT Sprocket* AllocateSprocket(); +// AllocateSprocket(); // Triggers a warning. +// +// When annotating a class, it is equivalent to annotating every function which +// returns an instance. +// +// class ABSL_MUST_USE_RESULT Sprocket {}; +// Sprocket(); // Triggers a warning. +// +// Sprocket MakeSprocket(); +// MakeSprocket(); // Triggers a warning. +// +// Note that references and pointers are not instances: +// +// Sprocket* SprocketPointer(); +// SprocketPointer(); // Does *not* trigger a warning. +// +// ABSL_MUST_USE_RESULT allows using cast-to-void to suppress the unused result +// warning. For that, warn_unused_result is used only for clang but not for gcc. +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425 +// +// Note: past advice was to place the macro after the argument list. +// +// TODO(b/176172494): Use ABSL_HAVE_CPP_ATTRIBUTE(nodiscard) when all code is +// compliant with the stricter [[nodiscard]]. +#if defined(__clang__) && ABSL_HAVE_ATTRIBUTE(warn_unused_result) +#define ABSL_MUST_USE_RESULT __attribute__((warn_unused_result)) +#else +#define ABSL_MUST_USE_RESULT +#endif + +// ABSL_ATTRIBUTE_HOT, ABSL_ATTRIBUTE_COLD +// +// Tells GCC that a function is hot or cold. GCC can use this information to +// improve static analysis, i.e. a conditional branch to a cold function +// is likely to be not-taken. +// This annotation is used for function declarations. +// +// Example: +// +// int foo() ABSL_ATTRIBUTE_HOT; +#if ABSL_HAVE_ATTRIBUTE(hot) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_HOT __attribute__((hot)) +#else +#define ABSL_ATTRIBUTE_HOT +#endif + +#if ABSL_HAVE_ATTRIBUTE(cold) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_COLD __attribute__((cold)) +#else +#define ABSL_ATTRIBUTE_COLD +#endif + +// ABSL_XRAY_ALWAYS_INSTRUMENT, ABSL_XRAY_NEVER_INSTRUMENT, ABSL_XRAY_LOG_ARGS +// +// We define the ABSL_XRAY_ALWAYS_INSTRUMENT and ABSL_XRAY_NEVER_INSTRUMENT +// macro used as an attribute to mark functions that must always or never be +// instrumented by XRay. Currently, this is only supported in Clang/LLVM. +// +// For reference on the LLVM XRay instrumentation, see +// http://llvm.org/docs/XRay.html. +// +// A function with the XRAY_ALWAYS_INSTRUMENT macro attribute in its declaration +// will always get the XRay instrumentation sleds. These sleds may introduce +// some binary size and runtime overhead and must be used sparingly. +// +// These attributes only take effect when the following conditions are met: +// +// * The file/target is built in at least C++11 mode, with a Clang compiler +// that supports XRay attributes. +// * The file/target is built with the -fxray-instrument flag set for the +// Clang/LLVM compiler. +// * The function is defined in the translation unit (the compiler honors the +// attribute in either the definition or the declaration, and must match). +// +// There are cases when, even when building with XRay instrumentation, users +// might want to control specifically which functions are instrumented for a +// particular build using special-case lists provided to the compiler. These +// special case lists are provided to Clang via the +// -fxray-always-instrument=... and -fxray-never-instrument=... flags. The +// attributes in source take precedence over these special-case lists. +// +// To disable the XRay attributes at build-time, users may define +// ABSL_NO_XRAY_ATTRIBUTES. Do NOT define ABSL_NO_XRAY_ATTRIBUTES on specific +// packages/targets, as this may lead to conflicting definitions of functions at +// link-time. +// +// XRay isn't currently supported on Android: +// https://github.com/android/ndk/issues/368 +#if ABSL_HAVE_CPP_ATTRIBUTE(clang::xray_always_instrument) && \ + !defined(ABSL_NO_XRAY_ATTRIBUTES) && !defined(__ANDROID__) +#define ABSL_XRAY_ALWAYS_INSTRUMENT [[clang::xray_always_instrument]] +#define ABSL_XRAY_NEVER_INSTRUMENT [[clang::xray_never_instrument]] +#if ABSL_HAVE_CPP_ATTRIBUTE(clang::xray_log_args) +#define ABSL_XRAY_LOG_ARGS(N) \ + [[clang::xray_always_instrument, clang::xray_log_args(N)]] +#else +#define ABSL_XRAY_LOG_ARGS(N) [[clang::xray_always_instrument]] +#endif +#else +#define ABSL_XRAY_ALWAYS_INSTRUMENT +#define ABSL_XRAY_NEVER_INSTRUMENT +#define ABSL_XRAY_LOG_ARGS(N) +#endif + +// ABSL_ATTRIBUTE_REINITIALIZES +// +// Indicates that a member function reinitializes the entire object to a known +// state, independent of the previous state of the object. +// +// The clang-tidy check bugprone-use-after-move allows member functions marked +// with this attribute to be called on objects that have been moved from; +// without the attribute, this would result in a use-after-move warning. +#if ABSL_HAVE_CPP_ATTRIBUTE(clang::reinitializes) +#define ABSL_ATTRIBUTE_REINITIALIZES [[clang::reinitializes]] +#else +#define ABSL_ATTRIBUTE_REINITIALIZES +#endif + +// ----------------------------------------------------------------------------- +// Variable Attributes +// ----------------------------------------------------------------------------- + +// ABSL_ATTRIBUTE_UNUSED +// +// Prevents the compiler from complaining about variables that appear unused. +// +// For code or headers that are assured to only build with C++17 and up, prefer +// just using the standard '[[maybe_unused]]' directly over this macro. +// +// Due to differences in positioning requirements between the old, compiler +// specific __attribute__ syntax and the now standard [[maybe_unused]], this +// macro does not attempt to take advantage of '[[maybe_unused]]'. +#if ABSL_HAVE_ATTRIBUTE(unused) || (defined(__GNUC__) && !defined(__clang__)) +#undef ABSL_ATTRIBUTE_UNUSED +#define ABSL_ATTRIBUTE_UNUSED __attribute__((__unused__)) +#else +#define ABSL_ATTRIBUTE_UNUSED +#endif + +// ABSL_ATTRIBUTE_INITIAL_EXEC +// +// Tells the compiler to use "initial-exec" mode for a thread-local variable. +// See http://people.redhat.com/drepper/tls.pdf for the gory details. +#if ABSL_HAVE_ATTRIBUTE(tls_model) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_INITIAL_EXEC __attribute__((tls_model("initial-exec"))) +#else +#define ABSL_ATTRIBUTE_INITIAL_EXEC +#endif + +// ABSL_ATTRIBUTE_PACKED +// +// Instructs the compiler not to use natural alignment for a tagged data +// structure, but instead to reduce its alignment to 1. +// +// Therefore, DO NOT APPLY THIS ATTRIBUTE TO STRUCTS CONTAINING ATOMICS. Doing +// so can cause atomic variables to be mis-aligned and silently violate +// atomicity on x86. +// +// This attribute can either be applied to members of a structure or to a +// structure in its entirety. Applying this attribute (judiciously) to a +// structure in its entirety to optimize the memory footprint of very +// commonly-used structs is fine. Do not apply this attribute to a structure in +// its entirety if the purpose is to control the offsets of the members in the +// structure. Instead, apply this attribute only to structure members that need +// it. +// +// When applying ABSL_ATTRIBUTE_PACKED only to specific structure members the +// natural alignment of structure members not annotated is preserved. Aligned +// member accesses are faster than non-aligned member accesses even if the +// targeted microprocessor supports non-aligned accesses. +#if ABSL_HAVE_ATTRIBUTE(packed) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_PACKED __attribute__((__packed__)) +#else +#define ABSL_ATTRIBUTE_PACKED +#endif + +// ABSL_ATTRIBUTE_FUNC_ALIGN +// +// Tells the compiler to align the function start at least to certain +// alignment boundary +#if ABSL_HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_FUNC_ALIGN(bytes) __attribute__((aligned(bytes))) +#else +#define ABSL_ATTRIBUTE_FUNC_ALIGN(bytes) +#endif + +// ABSL_FALLTHROUGH_INTENDED +// +// Annotates implicit fall-through between switch labels, allowing a case to +// indicate intentional fallthrough and turn off warnings about any lack of a +// `break` statement. The ABSL_FALLTHROUGH_INTENDED macro should be followed by +// a semicolon and can be used in most places where `break` can, provided that +// no statements exist between it and the next switch label. +// +// Example: +// +// switch (x) { +// case 40: +// case 41: +// if (truth_is_out_there) { +// ++x; +// ABSL_FALLTHROUGH_INTENDED; // Use instead of/along with annotations +// // in comments +// } else { +// return x; +// } +// case 42: +// ... +// +// Notes: When supported, GCC and Clang can issue a warning on switch labels +// with unannotated fallthrough using the warning `-Wimplicit-fallthrough`. See +// clang documentation on language extensions for details: +// https://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough +// +// When used with unsupported compilers, the ABSL_FALLTHROUGH_INTENDED macro has +// no effect on diagnostics. In any case this macro has no effect on runtime +// behavior and performance of code. + +#ifdef ABSL_FALLTHROUGH_INTENDED +#error "ABSL_FALLTHROUGH_INTENDED should not be defined." +#elif ABSL_HAVE_CPP_ATTRIBUTE(fallthrough) +#define ABSL_FALLTHROUGH_INTENDED [[fallthrough]] +#elif ABSL_HAVE_CPP_ATTRIBUTE(clang::fallthrough) +#define ABSL_FALLTHROUGH_INTENDED [[clang::fallthrough]] +#elif ABSL_HAVE_CPP_ATTRIBUTE(gnu::fallthrough) +#define ABSL_FALLTHROUGH_INTENDED [[gnu::fallthrough]] +#else +#define ABSL_FALLTHROUGH_INTENDED \ + do { \ + } while (0) +#endif + +// ABSL_DEPRECATED() +// +// Marks a deprecated class, struct, enum, function, method and variable +// declarations. The macro argument is used as a custom diagnostic message (e.g. +// suggestion of a better alternative). +// +// For code or headers that are assured to only build with C++14 and up, prefer +// just using the standard `[[deprecated("message")]]` directly over this macro. +// +// Examples: +// +// class ABSL_DEPRECATED("Use Bar instead") Foo {...}; +// +// ABSL_DEPRECATED("Use Baz() instead") void Bar() {...} +// +// template +// ABSL_DEPRECATED("Use DoThat() instead") +// void DoThis(); +// +// enum FooEnum { +// kBar ABSL_DEPRECATED("Use kBaz instead"), +// }; +// +// Every usage of a deprecated entity will trigger a warning when compiled with +// GCC/Clang's `-Wdeprecated-declarations` option. Google's production toolchain +// turns this warning off by default, instead relying on clang-tidy to report +// new uses of deprecated code. +#if ABSL_HAVE_ATTRIBUTE(deprecated) +#define ABSL_DEPRECATED(message) __attribute__((deprecated(message))) +#else +#define ABSL_DEPRECATED(message) +#endif + +// When deprecating Abseil code, it is sometimes necessary to turn off the +// warning within Abseil, until the deprecated code is actually removed. The +// deprecated code can be surrounded with these directives to achieve that +// result. +// +// class ABSL_DEPRECATED("Use Bar instead") Foo; +// +// ABSL_INTERNAL_DISABLE_DEPRECATED_DECLARATION_WARNING +// Baz ComputeBazFromFoo(Foo f); +// ABSL_INTERNAL_RESTORE_DEPRECATED_DECLARATION_WARNING +#if defined(__GNUC__) || defined(__clang__) +// Clang also supports these GCC pragmas. +#define ABSL_INTERNAL_DISABLE_DEPRECATED_DECLARATION_WARNING \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#define ABSL_INTERNAL_RESTORE_DEPRECATED_DECLARATION_WARNING \ + _Pragma("GCC diagnostic pop") +#elif defined(_MSC_VER) +#define ABSL_INTERNAL_DISABLE_DEPRECATED_DECLARATION_WARNING \ + _Pragma("warning(push)") _Pragma("warning(disable: 4996)") +#define ABSL_INTERNAL_RESTORE_DEPRECATED_DECLARATION_WARNING \ + _Pragma("warning(pop)") +#else +#define ABSL_INTERNAL_DISABLE_DEPRECATED_DECLARATION_WARNING +#define ABSL_INTERNAL_RESTORE_DEPRECATED_DECLARATION_WARNING +#endif // defined(__GNUC__) || defined(__clang__) + +// ABSL_CONST_INIT +// +// A variable declaration annotated with the `ABSL_CONST_INIT` attribute will +// not compile (on supported platforms) unless the variable has a constant +// initializer. This is useful for variables with static and thread storage +// duration, because it guarantees that they will not suffer from the so-called +// "static init order fiasco". +// +// This attribute must be placed on the initializing declaration of the +// variable. Some compilers will give a -Wmissing-constinit warning when this +// attribute is placed on some other declaration but missing from the +// initializing declaration. +// +// In some cases (notably with thread_local variables), `ABSL_CONST_INIT` can +// also be used in a non-initializing declaration to tell the compiler that a +// variable is already initialized, reducing overhead that would otherwise be +// incurred by a hidden guard variable. Thus annotating all declarations with +// this attribute is recommended to potentially enhance optimization. +// +// Example: +// +// class MyClass { +// public: +// ABSL_CONST_INIT static MyType my_var; +// }; +// +// ABSL_CONST_INIT MyType MyClass::my_var = MakeMyType(...); +// +// For code or headers that are assured to only build with C++20 and up, prefer +// just using the standard `constinit` keyword directly over this macro. +// +// Note that this attribute is redundant if the variable is declared constexpr. +#if defined(__cpp_constinit) && __cpp_constinit >= 201907L +#define ABSL_CONST_INIT constinit +#elif ABSL_HAVE_CPP_ATTRIBUTE(clang::require_constant_initialization) +#define ABSL_CONST_INIT [[clang::require_constant_initialization]] +#else +#define ABSL_CONST_INIT +#endif + +// ABSL_ATTRIBUTE_PURE_FUNCTION +// +// ABSL_ATTRIBUTE_PURE_FUNCTION is used to annotate declarations of "pure" +// functions. A function is pure if its return value is only a function of its +// arguments. The pure attribute prohibits a function from modifying the state +// of the program that is observable by means other than inspecting the +// function's return value. Declaring such functions with the pure attribute +// allows the compiler to avoid emitting some calls in repeated invocations of +// the function with the same argument values. +// +// Example: +// +// ABSL_ATTRIBUTE_PURE_FUNCTION std::string FormatTime(Time t); +#if ABSL_HAVE_CPP_ATTRIBUTE(gnu::pure) +#define ABSL_ATTRIBUTE_PURE_FUNCTION [[gnu::pure]] +#elif ABSL_HAVE_ATTRIBUTE(pure) +#define ABSL_ATTRIBUTE_PURE_FUNCTION __attribute__((pure)) +#else +// If the attribute isn't defined, we'll fallback to ABSL_MUST_USE_RESULT since +// pure functions are useless if its return is ignored. +#define ABSL_ATTRIBUTE_PURE_FUNCTION ABSL_MUST_USE_RESULT +#endif + +// ABSL_ATTRIBUTE_CONST_FUNCTION +// +// ABSL_ATTRIBUTE_CONST_FUNCTION is used to annotate declarations of "const" +// functions. A const function is similar to a pure function, with one +// exception: Pure functions may return value that depend on a non-volatile +// object that isn't provided as a function argument, while the const function +// is guaranteed to return the same result given the same arguments. +// +// Example: +// +// ABSL_ATTRIBUTE_CONST_FUNCTION int64_t ToInt64Milliseconds(Duration d); +#if defined(_MSC_VER) && !defined(__clang__) +// Put the MSVC case first since MSVC seems to parse const as a C++ keyword. +#define ABSL_ATTRIBUTE_CONST_FUNCTION ABSL_ATTRIBUTE_PURE_FUNCTION +#elif ABSL_HAVE_CPP_ATTRIBUTE(gnu::const) +#define ABSL_ATTRIBUTE_CONST_FUNCTION [[gnu::const]] +#elif ABSL_HAVE_ATTRIBUTE(const) +#define ABSL_ATTRIBUTE_CONST_FUNCTION __attribute__((const)) +#else +// Since const functions are more restrictive pure function, we'll fallback to a +// pure function if the const attribute is not handled. +#define ABSL_ATTRIBUTE_CONST_FUNCTION ABSL_ATTRIBUTE_PURE_FUNCTION +#endif + +// ABSL_ATTRIBUTE_LIFETIME_BOUND indicates that a resource owned by a function +// parameter or implicit object parameter is retained by the return value of the +// annotated function (or, for a parameter of a constructor, in the value of the +// constructed object). This attribute causes warnings to be produced if a +// temporary object does not live long enough. +// +// When applied to a reference parameter, the referenced object is assumed to be +// retained by the return value of the function. When applied to a non-reference +// parameter (for example, a pointer or a class type), all temporaries +// referenced by the parameter are assumed to be retained by the return value of +// the function. +// +// See also the upstream documentation: +// https://clang.llvm.org/docs/AttributeReference.html#lifetimebound +// https://learn.microsoft.com/en-us/cpp/code-quality/c26816?view=msvc-170 +#if ABSL_HAVE_CPP_ATTRIBUTE(clang::lifetimebound) +#define ABSL_ATTRIBUTE_LIFETIME_BOUND [[clang::lifetimebound]] +#elif ABSL_HAVE_CPP_ATTRIBUTE(msvc::lifetimebound) +#define ABSL_ATTRIBUTE_LIFETIME_BOUND [[msvc::lifetimebound]] +#elif ABSL_HAVE_ATTRIBUTE(lifetimebound) +#define ABSL_ATTRIBUTE_LIFETIME_BOUND __attribute__((lifetimebound)) +#else +#define ABSL_ATTRIBUTE_LIFETIME_BOUND +#endif + +// ABSL_ATTRIBUTE_VIEW indicates that a type is solely a "view" of data that it +// points to, similarly to a span, string_view, or other non-owning reference +// type. +// This enables diagnosing certain lifetime issues similar to those enabled by +// ABSL_ATTRIBUTE_LIFETIME_BOUND, such as: +// +// struct ABSL_ATTRIBUTE_VIEW StringView { +// template +// StringView(const R&); +// }; +// +// StringView f(std::string s) { +// return s; // warning: address of stack memory returned +// } +// +// We disable this on Clang versions < 13 because of the following +// false-positive: +// +// absl::string_view f(absl::optional sv) { return *sv; } +// +// See the following links for details: +// https://reviews.llvm.org/D64448 +// https://lists.llvm.org/pipermail/cfe-dev/2018-November/060355.html +#if ABSL_HAVE_CPP_ATTRIBUTE(gsl::Pointer) && \ + (!defined(__clang_major__) || __clang_major__ >= 13) +#define ABSL_ATTRIBUTE_VIEW [[gsl::Pointer]] +#else +#define ABSL_ATTRIBUTE_VIEW +#endif + +// ABSL_ATTRIBUTE_OWNER indicates that a type is a container, smart pointer, or +// similar class that owns all the data that it points to. +// This enables diagnosing certain lifetime issues similar to those enabled by +// ABSL_ATTRIBUTE_LIFETIME_BOUND, such as: +// +// struct ABSL_ATTRIBUTE_VIEW StringView { +// template +// StringView(const R&); +// }; +// +// struct ABSL_ATTRIBUTE_OWNER String {}; +// +// StringView f(String s) { +// return s; // warning: address of stack memory returned +// } +// +// We disable this on Clang versions < 13 because of the following +// false-positive: +// +// absl::string_view f(absl::optional sv) { return *sv; } +// +// See the following links for details: +// https://reviews.llvm.org/D64448 +// https://lists.llvm.org/pipermail/cfe-dev/2018-November/060355.html +#if ABSL_HAVE_CPP_ATTRIBUTE(gsl::Owner) && \ + (!defined(__clang_major__) || __clang_major__ >= 13) +#define ABSL_ATTRIBUTE_OWNER [[gsl::Owner]] +#else +#define ABSL_ATTRIBUTE_OWNER +#endif + +// ABSL_ATTRIBUTE_TRIVIAL_ABI +// Indicates that a type is "trivially relocatable" -- meaning it can be +// relocated without invoking the constructor/destructor, using a form of move +// elision. +// +// From a memory safety point of view, putting aside destructor ordering, it's +// safe to apply ABSL_ATTRIBUTE_TRIVIAL_ABI if an object's location +// can change over the course of its lifetime: if a constructor can be run one +// place, and then the object magically teleports to another place where some +// methods are run, and then the object teleports to yet another place where it +// is destroyed. This is notably not true for self-referential types, where the +// move-constructor must keep the self-reference up to date. If the type changed +// location without invoking the move constructor, it would have a dangling +// self-reference. +// +// The use of this teleporting machinery means that the number of paired +// move/destroy operations can change, and so it is a bad idea to apply this to +// a type meant to count the number of moves. +// +// Warning: applying this can, rarely, break callers. Objects passed by value +// will be destroyed at the end of the call, instead of the end of the +// full-expression containing the call. In addition, it changes the ABI +// of functions accepting this type by value (e.g. to pass in registers). +// +// See also the upstream documentation: +// https://clang.llvm.org/docs/AttributeReference.html#trivial-abi +// +// b/321691395 - This is currently disabled in open-source builds since +// compiler support differs. If system libraries compiled with GCC are mixed +// with libraries compiled with Clang, types will have different ideas about +// their ABI, leading to hard to debug crashes. +#define ABSL_ATTRIBUTE_TRIVIAL_ABI + +// ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS +// +// Indicates a data member can be optimized to occupy no space (if it is empty) +// and/or its tail padding can be used for other members. +// +// For code that is assured to only build with C++20 or later, prefer using +// the standard attribute `[[no_unique_address]]` directly instead of this +// macro. +// +// https://devblogs.microsoft.com/cppblog/msvc-cpp20-and-the-std-cpp20-switch/#c20-no_unique_address +// Current versions of MSVC have disabled `[[no_unique_address]]` since it +// breaks ABI compatibility, but offers `[[msvc::no_unique_address]]` for +// situations when it can be assured that it is desired. Since Abseil does not +// claim ABI compatibility in mixed builds, we can offer it unconditionally. +#if defined(_MSC_VER) && _MSC_VER >= 1929 +#define ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] +#elif ABSL_HAVE_CPP_ATTRIBUTE(no_unique_address) +#define ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else +#define ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS +#endif + +// ABSL_ATTRIBUTE_UNINITIALIZED +// +// GCC and Clang support a flag `-ftrivial-auto-var-init=