diff --git a/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so b/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so index cfaec8b38a..8c65aa5d85 100644 Binary files a/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so and b/packages/react-native-executorch/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so differ diff --git a/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so b/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so index c6b5f4d571..a56a5d20ac 100644 Binary files a/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so and b/packages/react-native-executorch/third-party/android/libs/executorch/x86_64/libexecutorch.so differ diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/base64.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/base64.h index 3dfebc7d3e..9034d7c661 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/base64.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/base64.h @@ -36,7 +36,7 @@ namespace base64 { using tokenizers::Error; using tokenizers::Result; -Result decode(const std::string_view &input); +Result decode(const std::string_view& input); namespace detail { @@ -68,9 +68,12 @@ inline Error validate(uint32_t v) { return Error::Ok; } -inline Error decode(const std::string_view &input, std::string &output) { - TK_CHECK_OR_RETURN_ERROR(input.size() == 4, Base64DecodeFailure, - "input length must be 4, got %zu", input.size()); +inline Error decode(const std::string_view& input, std::string& output) { + TK_CHECK_OR_RETURN_ERROR( + input.size() == 4, + Base64DecodeFailure, + "input length must be 4, got %zu", + input.size()); uint32_t val = 0; @@ -100,10 +103,14 @@ inline Error decode(const std::string_view &input, std::string &output) { return Error::Ok; } -inline Error decode_1_padding(const std::string_view &input, - std::string &output) { - TK_CHECK_OR_RETURN_ERROR(input.size() == 3, Base64DecodeFailure, - "input length must be 3, got %zu", input.size()); +inline Error decode_1_padding( + const std::string_view& input, + std::string& output) { + TK_CHECK_OR_RETURN_ERROR( + input.size() == 3, + Base64DecodeFailure, + "input length must be 3, got %zu", + input.size()); uint32_t val = 0; @@ -127,10 +134,14 @@ inline Error decode_1_padding(const std::string_view &input, return Error::Ok; } -inline Error decode_2_padding(const std::string_view &input, - std::string &output) { - TK_CHECK_OR_RETURN_ERROR(input.size() == 2, Base64DecodeFailure, - "input length must be 2, got %zu", input.size()); +inline Error decode_2_padding( + const std::string_view& input, + std::string& output) { + TK_CHECK_OR_RETURN_ERROR( + input.size() == 2, + Base64DecodeFailure, + "input length must be 2, got %zu", + input.size()); uint32_t val = 0; @@ -150,12 +161,13 @@ inline Error decode_2_padding(const std::string_view &input, } // namespace detail -inline tokenizers::Result decode(const std::string_view &input) { +inline tokenizers::Result decode(const std::string_view& input) { TK_CHECK_OR_RETURN_ERROR(!input.empty(), Base64DecodeFailure, "empty input"); // Faster than `input.size() % 4`. TK_CHECK_OR_RETURN_ERROR( - (input.size() & 3) == 0 && input.size() >= 4, Base64DecodeFailure, + (input.size() & 3) == 0 && input.size() >= 4, + Base64DecodeFailure, "input length must be larger than 4 and is multiple of 4, got %zu", input.size()); diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_model.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_model.h index 246927d5e3..4ee6d4904b 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_model.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_model.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -24,47 +25,66 @@ namespace tokenizers { class BPEModel : public Model { -public: - explicit BPEModel(detail::TokenMap token_map, - detail::TokenMap special_token_map, - std::optional merge_ranks, - std::unique_ptr special_token_regex, - bool byte_fallback, std::optional unk_token_id, - std::optional bos_token_id, - std::optional eos_token_id); + public: + explicit BPEModel( + detail::TokenMap token_map, + detail::TokenMap special_token_map, + std::optional merge_ranks, + std::unique_ptr special_token_regex, + bool byte_fallback, + std::optional unk_token_id, + std::optional bos_token_id, + std::optional eos_token_id, + std::unordered_set rstrip_tokens = {}, + std::unordered_set lstrip_tokens = {}); ~BPEModel() override = default; - Result> - tokenize(const std::string &piece) const override; + Result> tokenize( + const std::string& piece) const override; Result id_to_piece(uint64_t token) const override; - Result piece_to_id(const std::string &token) const override; + Result piece_to_id(const std::string& token) const override; - int32_t vocab_size() const override { return vocab_size_; } + int32_t vocab_size() const override { + return vocab_size_; + } bool is_special_token(uint64_t token) const override; - bool is_loaded() const override { return initialized_; } + bool is_loaded() const override { + return initialized_; + } std::pair, std::string> - split_with_allowed_special_token(const std::string &input, - size_t offset) const override; + split_with_allowed_special_token(const std::string& input, size_t offset) + const override; - uint64_t bos_token_id() const override { return bos_token_id_.value_or(0); } + bool special_token_has_rstrip(const std::string& token) const override { + return rstrip_tokens_.count(token) > 0; + } + bool special_token_has_lstrip(const std::string& token) const override { + return lstrip_tokens_.count(token) > 0; + } - uint64_t eos_token_id() const override { return eos_token_id_.value_or(0); } + uint64_t bos_token_id() const override { + return bos_token_id_.value_or(0); + } -private: - Result, uint64_t>> - encode_with_special_token(const std::string &text) const; + uint64_t eos_token_id() const override { + return eos_token_id_.value_or(0); + } - Result> - byte_pair_encode(const std::string &piece) const; + private: + Result, uint64_t>> encode_with_special_token( + const std::string& text) const; - std::vector - byte_pair_merge(const std::string &piece, const detail::TokenMap &ranks, - std::function func) const; + Result> byte_pair_encode(const std::string& piece) const; + + std::vector byte_pair_merge( + const std::string& piece, + const detail::TokenMap& ranks, + std::function func) const; // Real state detail::TokenMap token_map_; @@ -76,6 +96,8 @@ class BPEModel : public Model { std::optional unk_token_id_; std::optional bos_token_id_; std::optional eos_token_id_; + std::unordered_set rstrip_tokens_; + std::unordered_set lstrip_tokens_; bool initialized_ = false; int32_t vocab_size_ = 0; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_tokenizer_base.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_tokenizer_base.h index 9323f18888..f83dc1e78b 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_tokenizer_base.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_tokenizer_base.h @@ -32,53 +32,62 @@ namespace tokenizers { namespace detail { class BPETokenizerBase : public Tokenizer { -public: - Result> encode(const std::string &input, int8_t bos, - int8_t eos) const override; + public: + Result> + encode(const std::string& input, int8_t bos, int8_t eos) const override; Result id_to_piece(uint64_t token) const override; - Result piece_to_id(const std::string &text) const override; + Result piece_to_id(const std::string& text) const override; - Result decode(uint64_t prev_token, uint64_t token, - bool skip_special_tokens = false) const override; + Result decode( + uint64_t prev_token, + uint64_t token, + bool skip_special_tokens = false) const override; -protected: + protected: explicit BPETokenizerBase() {} virtual ~BPETokenizerBase() override {} std::pair, std::string> - split_with_allowed_special_token_(const std::string &input, - const TokenMap &allowed_special) const; + split_with_allowed_special_token_( + const std::string& input, + const TokenMap& allowed_special) const; std::pair, std::string> - split_with_allowed_special_token_(const std::string &input, size_t offset, - const TokenMap &allowed_special) const; + split_with_allowed_special_token_( + const std::string& input, + size_t offset, + const TokenMap& allowed_special) const; - Result, uint64_t>> - encode_with_special_token_(const std::string &text, - const TokenMap &allowed_special) const; + Result, uint64_t>> encode_with_special_token_( + const std::string& text, + const TokenMap& allowed_special) const; - virtual Result> - byte_pair_encode_(const std::string &piece, const TokenMap &encoder) const; + virtual Result> byte_pair_encode_( + const std::string& piece, + const TokenMap& encoder) const; // Virtual method for BPE merging - can be overridden by derived classes // The passed in `ranks` param for the base impl is just a regular token map // and that the actual ranks are derived implicitly from the regular token // map. This is the same implementation as Tiktoken. - virtual std::vector - _byte_pair_merge(const std::string &piece, const TokenMap &ranks, - std::function func) const; + virtual std::vector _byte_pair_merge( + const std::string& piece, + const TokenMap& ranks, + std::function func) const; // Protected members that can be overloaded by other BPE tokenizers std::unique_ptr special_token_regex_; std::optional token_map_; std::optional special_token_map_; -private: - virtual Error _encode(const std::string &input, std::vector &ret, - uint64_t &last_piece_token_len) const = 0; + private: + virtual Error _encode( + const std::string& input, + std::vector& ret, + uint64_t& last_piece_token_len) const = 0; - virtual void _decode(const std::string &input, std::string &ret) const = 0; + virtual void _decode(const std::string& input, std::string& ret) const = 0; }; } // namespace detail diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/error.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/error.h index 52dff63c6d..f12fda2a79 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/error.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/error.h @@ -73,12 +73,12 @@ enum class Error : error_code_t { * @param[in] message__ Format string for the log error message. * @param[in] ... Optional additional arguments for the format string. */ -#define TK_CHECK_OR_RETURN_ERROR(cond__, error__, message__, ...) \ - { \ - if (!(cond__)) { \ - TK_LOG(Error, message__, ##__VA_ARGS__); \ - return ::tokenizers::Error::error__; \ - } \ +#define TK_CHECK_OR_RETURN_ERROR(cond__, error__, message__, ...) \ + { \ + if (!(cond__)) { \ + TK_LOG(Error, message__, ##__VA_ARGS__); \ + return ::tokenizers::Error::error__; \ + } \ } /** @@ -86,10 +86,10 @@ enum class Error : error_code_t { * @param[in] error__ Error enum value to return without the `Error::` prefix, * like `Base64DecodeFailure`. */ -#define TK_CHECK_OK_OR_RETURN_ERROR(error__) \ - do { \ - const auto et_error__ = (error__); \ - if (et_error__ != ::tokenizers::Error::Ok) { \ - return et_error__; \ - } \ +#define TK_CHECK_OK_OR_RETURN_ERROR(error__) \ + do { \ + const auto et_error__ = (error__); \ + if (et_error__ != ::tokenizers::Error::Ok) { \ + return et_error__; \ + } \ } while (0) diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/hf_tokenizer.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/hf_tokenizer.h index c43a163198..130e7e603d 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/hf_tokenizer.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/hf_tokenizer.h @@ -13,14 +13,16 @@ #pragma once // Standard -#include #include +#include #include // Local #include #include +#include #include +#include #include #include #include @@ -33,7 +35,7 @@ namespace tokenizers { class HFTokenizer : public Tokenizer { -public: + public: /*-- Public Interface --*/ /** @@ -45,39 +47,58 @@ class HFTokenizer : public Tokenizer { /** * Load the model data into the */ - Error load(const std::string &tokenizer_path) override; + Error load(const std::string& tokenizer_path) override; - Result> encode(const std::string &input, int8_t bos = 0, - int8_t eos = 0) const override; + Result> encode( + const std::string& input, + int8_t bos = 0, + int8_t eos = 0) const override; Result id_to_piece(uint64_t token) const override; - Result piece_to_id(const std::string &text) const override; - - Result decode(uint64_t prev_token, uint64_t token, - bool skip_special_tokens = false) const override; - - Result decode(const std::vector &tokens, - bool skip_special_tokens = false) const; - -private: - Error setup_normalizer(const nlohmann::json &parsed_json); - Error setup_pretokenizer(const nlohmann::json &parsed_json); - Error setup_postprocessor(const nlohmann::json &parsed_json); - Error setup_decoder(const nlohmann::json &parsed_json); - Error setup_truncation(const nlohmann::json &parsed_json); - Error setup_padding(const nlohmann::json &parsed_json); - Error setup_model(const nlohmann::json &parsed_json, - const std::string &model_config_path, - const std::string &special_tokens_map_path); - - Normalizer::Ptr _normalizer; - PreTokenizer::Ptr _pretokenizer; - PostProcessor::Ptr _postprocessor; - TokenDecoder::Ptr _decoder; - Truncation::Ptr _truncation; - Padding::Ptr _padding; - - Model::Ptr _model; + Result piece_to_id(const std::string& text) const override; + + Result decode( + uint64_t prev_token, + uint64_t token, + bool skip_special_tokens = false) const override; + + Result decode( + const std::vector& tokens, + bool skip_special_tokens = false) const; + + private: + Error setup_normalizer(const nlohmann::json& parsed_json); + Error setup_pretokenizer(const nlohmann::json& parsed_json); + Error setup_postprocessor(const nlohmann::json& parsed_json); + Error setup_decoder(const nlohmann::json& parsed_json); + Error setup_truncation(const nlohmann::json& parsed_json); + Error setup_padding(const nlohmann::json& parsed_json); + Error setup_model( + const nlohmann::json& parsed_json, + const std::string& model_config_path, + const std::string& special_tokens_map_path); + + /// Split input around the first added_token match. Returns (matched token, + /// text before match). Uses _added_token_regex if available, otherwise + /// falls back to the model's special-only regex. + std::pair, std::string> + split_added_token(const std::string& input, size_t offset) const; + + Normalizer::Ptr normalizer_; + PreTokenizer::Ptr pretokenizer_; + PostProcessor::Ptr postprocessor_; + TokenDecoder::Ptr decoder_; + Truncation::Ptr truncation_; + Padding::Ptr padding_; + + Model::Ptr model_; + + // Regex matching ALL added_tokens (both special and non-special). + // HF matches every added_token as a never-split unit during encoding. + // This is separate from the model's special_token_regex which only + // contains special=true tokens (for standalone model.tokenize()). + std::unique_ptr added_token_regex_; + std::unique_ptr added_token_map_; }; } // namespace tokenizers \ No newline at end of file diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/llama2c_tokenizer.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/llama2c_tokenizer.h index 6376ef3b4c..058740d0e2 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/llama2c_tokenizer.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/llama2c_tokenizer.h @@ -7,30 +7,32 @@ */ // @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude #pragma once -#include #include +#include namespace tokenizers { // A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code // won't work with this class, it needs to go through tokenizer.py first. class Llama2cTokenizer : public Tokenizer { -public: + public: explicit Llama2cTokenizer(); ~Llama2cTokenizer() override; - Error load(const std::string &tokenizer_path) override; + Error load(const std::string& tokenizer_path) override; Result id_to_piece(uint64_t token) const override; - Result piece_to_id(const std::string &text) const override; + Result piece_to_id(const std::string& text) const override; - Result> encode(const std::string &input, int8_t bos, - int8_t eos) const override; + Result> + encode(const std::string& input, int8_t bos, int8_t eos) const override; - Result decode(uint64_t prev_token, uint64_t token, - bool skip_special_tokens = false) const override; + Result decode( + uint64_t prev_token, + uint64_t token, + bool skip_special_tokens = false) const override; -private: + private: inline Error _decode_verify(uint64_t token) const { if (!initialized_) { return Error::Uninitialized; @@ -40,7 +42,7 @@ class Llama2cTokenizer : public Tokenizer { } return Error::Ok; } - std::unique_ptr vocab_ = nullptr; + std::unique_ptr vocab_ = nullptr; std::unique_ptr vocab_scores_ = nullptr; std::unique_ptr sorted_vocab_ = nullptr; unsigned int max_token_length_ = 0; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/log.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/log.h index 505caa44a3..0282a2ca21 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/log.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/log.h @@ -40,7 +40,7 @@ #include #define TK_PRINTFLIKE(_string_index, _va_index) _Printf_format_string_ #else -#define TK_PRINTFLIKE(_string_index, _va_index) \ +#define TK_PRINTFLIKE(_string_index, _va_index) \ __attribute__((format(printf, _string_index, _va_index))) #endif @@ -122,8 +122,12 @@ typedef enum { * @param[in] length Message string length. */ inline void TK_INTERNAL_PLATFORM_WEAKNESS tk_pal_emit_log_message( - tk_pal_log_level_t level, const char *filename, const char *function, - size_t line, const char *message, size_t length) { + tk_pal_log_level_t level, + const char* filename, + const char* function, + size_t line, + const char* message, + size_t length) { // Use a format similar to glog and folly::logging, except: // - Print time since et_pal_init since we don't have wall time // - Don't include the thread ID, to avoid adding a threading dependency @@ -131,8 +135,13 @@ inline void TK_INTERNAL_PLATFORM_WEAKNESS tk_pal_emit_log_message( // // Clients who want to change the format or add other fields can override this // weak implementation of et_pal_emit_log_message. - fprintf(TK_LOG_OUTPUT_FILE, "%c tokenizers:%s:%zu] %s\n", level, filename, - line, message); + fprintf( + TK_LOG_OUTPUT_FILE, + "%c tokenizers:%s:%zu] %s\n", + level, + filename, + line, + message); fflush(TK_LOG_OUTPUT_FILE); } @@ -203,8 +212,13 @@ static constexpr tk_pal_log_level_t kLevelToPal[size_t(LogLevel::NumLevels)] = { * @param[in] args Variable argument list. */ TK_PRINTFLIKE(5, 0) -inline void vlogf(LogLevel level, const char *filename, const char *function, - size_t line, const char *format, va_list args) { +inline void vlogf( + LogLevel level, + const char* filename, + const char* function, + size_t line, + const char* format, + va_list args) { // Maximum length of a log message. static constexpr size_t kMaxLogMessageLength = 256; char buf[kMaxLogMessageLength]; @@ -217,8 +231,8 @@ inline void vlogf(LogLevel level, const char *filename, const char *function, tk_pal_log_level_t pal_level = (int(level) >= 0 && level < LogLevel::NumLevels) - ? kLevelToPal[size_t(level)] - : tk_pal_log_level_t::kUnknown; + ? kLevelToPal[size_t(level)] + : tk_pal_log_level_t::kUnknown; tk_pal_emit_log_message(pal_level, filename, function, line, buf, len); } @@ -235,8 +249,13 @@ inline void vlogf(LogLevel level, const char *filename, const char *function, * @param[in] format Format string. */ TK_PRINTFLIKE(5, 6) -inline void logf(LogLevel level, const char *filename, const char *function, - size_t line, const char *format, ...) { +inline void logf( + LogLevel level, + const char* filename, + const char* function, + size_t line, + const char* format, + ...) { #if TK_LOG_ENABLED va_list args; va_start(args, format); @@ -257,14 +276,19 @@ inline void logf(LogLevel level, const char *filename, const char *function, * @param[in] _level Log severity level. * @param[in] _format Log message format string. */ -#define TK_LOG(_level, _format, ...) \ - do { \ - const auto _log_level = ::tokenizers::LogLevel::_level; \ - if (static_cast(_log_level) >= \ - static_cast(::tokenizers::LogLevel::TK_MIN_LOG_LEVEL)) { \ - ::tokenizers::internal::logf(_log_level, TK_SHORT_FILENAME, TK_FUNCTION, \ - TK_LINE, _format, ##__VA_ARGS__); \ - } \ +#define TK_LOG(_level, _format, ...) \ + do { \ + const auto _log_level = ::tokenizers::LogLevel::_level; \ + if (static_cast(_log_level) >= \ + static_cast(::tokenizers::LogLevel::TK_MIN_LOG_LEVEL)) { \ + ::tokenizers::internal::logf( \ + _log_level, \ + TK_SHORT_FILENAME, \ + TK_FUNCTION, \ + TK_LINE, \ + _format, \ + ##__VA_ARGS__); \ + } \ } while (0) #else // TK_LOG_ENABLED diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/map_utils.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/map_utils.h index 647b0c071e..42a049ef8d 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/map_utils.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/map_utils.h @@ -28,49 +28,62 @@ namespace detail { // Hash function for std::pair struct PairHash { - std::size_t operator()(const std::pair &p) const { + std::size_t operator()(const std::pair& p) const { return std::hash{}(p.first) ^ - (std::hash{}(p.second) << 1); + (std::hash{}(p.second) << 1); } }; // Type alias for BPE merge map: (token_id_1, token_id_2) -> (rank, // merged_token_id) -using MergeMap = std::unordered_map, - std::pair, PairHash>; +using MergeMap = std::unordered_map< + std::pair, + std::pair, + PairHash>; using TokenMap = StringIntegerMap<>; template -static Result -build_token_map(std::vector> container) { - static_assert(std::is_same_v || - std::is_same_v, - "TToken must be std::string or std::string_view"); - static_assert(std::is_integral_v && std::is_unsigned_v, - "TRank must be an unsigned integer"); - - std::sort(container.begin(), container.end(), - [](const auto &a, const auto &b) { return a.first < b.first; }); +static Result build_token_map( + std::vector> container) { + static_assert( + std::is_same_v || + std::is_same_v, + "TToken must be std::string or std::string_view"); + static_assert( + std::is_integral_v && std::is_unsigned_v, + "TRank must be an unsigned integer"); + + std::sort( + container.begin(), container.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); auto duplicate_begin = std::unique( - container.begin(), container.end(), - [](const auto &a, const auto &b) { return a.first == b.first; }); + container.begin(), container.end(), [](const auto& a, const auto& b) { + return a.first == b.first; + }); TK_CHECK_OR_RETURN_ERROR( - duplicate_begin == container.end(), ParseFailure, - "duplicate token: %s rank: %llu", duplicate_begin->first.c_str(), + duplicate_begin == container.end(), + ParseFailure, + "duplicate token: %s rank: %llu", + duplicate_begin->first.c_str(), static_cast(duplicate_begin->second)); - std::sort(container.begin(), container.end(), - [](const auto &a, const auto &b) { return a.second < b.second; }); + std::sort( + container.begin(), container.end(), [](const auto& a, const auto& b) { + return a.second < b.second; + }); duplicate_begin = std::unique( - container.begin(), container.end(), - [](const auto &a, const auto &b) { return a.second == b.second; }); + container.begin(), container.end(), [](const auto& a, const auto& b) { + return a.second == b.second; + }); TK_CHECK_OR_RETURN_ERROR( - duplicate_begin == container.end(), ParseFailure, + duplicate_begin == container.end(), + ParseFailure, "duplicate rank: %llu" " token: %s", static_cast(duplicate_begin->second), @@ -80,21 +93,24 @@ build_token_map(std::vector> container) { }; template -static Result build_token_map(const TContainer &container, - TTokenAccessor token_accessor, - TRankAccessor rank_accessor) { - using TokenType = std::invoke_result_t; - using RankType = std::invoke_result_t; - - static_assert(std::is_same_v || - std::is_same_v, - "TokenType must be std::string or std::string_view"); - static_assert(std::is_integral_v && std::is_unsigned_v, - "RankType must be an unsigned integer"); +static Result build_token_map( + const TContainer& container, + TTokenAccessor token_accessor, + TRankAccessor rank_accessor) { + using TokenType = std::invoke_result_t; + using RankType = std::invoke_result_t; + + static_assert( + std::is_same_v || + std::is_same_v, + "TokenType must be std::string or std::string_view"); + static_assert( + std::is_integral_v && std::is_unsigned_v, + "RankType must be an unsigned integer"); std::vector> pairs; pairs.reserve(container.size()); - for (const auto &value : container) { + for (const auto& value : container) { pairs.emplace_back(token_accessor(value), rank_accessor(value)); } @@ -103,22 +119,25 @@ static Result build_token_map(const TContainer &container, // Utility function to build merge ranks map from merge rules template -inline Result build_merge_ranks_map(const TMergeMap &merge_map, - const TokenMap &token_map) { +inline Result build_merge_ranks_map( + const TMergeMap& merge_map, + const TokenMap& token_map) { // Static assertions to verify TMergeMap has the expected key and value types using KeyType = typename TMergeMap::key_type; using ValueType = typename TMergeMap::mapped_type; - static_assert(std::is_same_v>, - "TMergeMap key type must be std::pair"); + static_assert( + std::is_same_v>, + "TMergeMap key type must be std::pair"); - static_assert(std::is_same_v>, - "TMergeMap value type must be std::pair"); + static_assert( + std::is_same_v>, + "TMergeMap value type must be std::pair"); // Use a map to handle duplicates - keep the lowest rank (highest priority) std::unordered_map unique_merge_ranks; - for (const auto &[pair, rank_and_id] : merge_map) { + for (const auto& [pair, rank_and_id] : merge_map) { uint64_t first_id = pair.first; uint64_t second_id = pair.second; uint64_t rank = rank_and_id.first; @@ -143,20 +162,20 @@ inline Result build_merge_ranks_map(const TMergeMap &merge_map, std::vector> merge_rank_pairs; merge_rank_pairs.reserve(unique_merge_ranks.size()); - for (const auto &[token, rank] : unique_merge_ranks) { + for (const auto& [token, rank] : unique_merge_ranks) { merge_rank_pairs.emplace_back(token, rank); } return build_token_map(std::move(merge_rank_pairs)); } -inline Result> -build_special_token_regex(const TokenMap &special_token_map) { +inline Result> build_special_token_regex( + const TokenMap& special_token_map) { std::string special_pattern; const std::size_t count = special_token_map.size(); for (std::size_t i = 0; i < count; ++i) { - const auto &[token, _] = special_token_map.getElement(i); + const auto& [token, _] = special_token_map.getElement(i); if (!special_pattern.empty()) { special_pattern += "|"; } @@ -170,5 +189,29 @@ build_special_token_regex(const TokenMap &special_token_map) { return create_regex("(" + special_pattern + ")"); } +/// Shared implementation: split input around the first regex-matched token +/// that exists in the token map. Used by all Model implementations. +inline std::pair, std::string> +split_with_special_token( + const IRegex* regex, + const TokenMap& token_map, + const std::string& input, + size_t offset) { + if (!regex) { + return {std::nullopt, input.substr(offset)}; + } + + auto matches = regex->find_all(input.substr(offset)); + + for (const auto& m : matches) { + std::string matched_text = input.substr(offset + m.start, m.end - m.start); + if (token_map.tryGetInteger(matched_text).has_value()) { + return {matched_text, input.substr(offset, m.start)}; + } + } + + return {std::nullopt, input.substr(offset)}; +} + } // namespace detail } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/model.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/model.h index 7edd078be6..4bb24ea8d7 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/model.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/model.h @@ -10,7 +10,9 @@ #pragma once #include +#include #include +#include #include #include @@ -33,8 +35,8 @@ namespace tokenizers { * Unigram). */ class Model { -public: - using Ptr = std::shared_ptr; + public: + using Ptr = std::unique_ptr; virtual ~Model() = default; @@ -44,8 +46,8 @@ class Model { * @param piece The input string to tokenize. * @return A Result containing the vector of token IDs. */ - virtual Result> - tokenize(const std::string &piece) const = 0; + virtual Result> tokenize( + const std::string& piece) const = 0; /** * Converts a token ID to its string representation. @@ -61,7 +63,7 @@ class Model { * @param piece The string representation of the token. * @return A Result containing the token ID. */ - virtual Result piece_to_id(const std::string &piece) const = 0; + virtual Result piece_to_id(const std::string& piece) const = 0; /** * Returns the size of the vocabulary. @@ -86,42 +88,64 @@ class Model { virtual bool is_loaded() const = 0; /** - * Helper to split input text into a special token and the preceding regular - * text. - * + * Helper to split input text into a special token and the preceding regular text. + * * @param input The input string. * @param offset The starting offset. * @return A pair of (matched special token string, preceding regular text). */ virtual std::pair, std::string> - split_with_allowed_special_token(const std::string &input, - size_t offset) const = 0; + split_with_allowed_special_token(const std::string& input, size_t offset) + const = 0; virtual uint64_t bos_token_id() const = 0; virtual uint64_t eos_token_id() const = 0; + + virtual bool special_token_has_rstrip(const std::string& token) const { + return false; + } + virtual bool special_token_has_lstrip(const std::string& token) const { + return false; + } +}; + +// -- Shared types ------------------------------------------------------------- + +/// Resolved BOS/EOS/UNK token IDs, produced by resolve_sequence_tokens(). +struct SequenceTokenIds { + std::optional unk_token_id; + std::optional bos_token_id; + std::optional eos_token_id; }; // -- Factory ------------------------------------------------------------------ // Helper macro to standardize addition of config member fields -#define MODEL_CONFIG_MEMBER(type, name) \ - std::optional name; \ - ModelConfig &set_##name(type arg) { \ - this->name = std::move(arg); \ - return *this; \ +#define MODEL_CONFIG_MEMBER(type, name) \ + std::optional name; \ + ModelConfig& set_##name(type arg) { \ + this->name = std::move(arg); \ + return *this; \ } /** * Factory and config class for creating a new Model */ class ModelConfig { -public: + public: std::string type; // Data for BPEModel using TokenPairs = std::vector>; MODEL_CONFIG_MEMBER(TokenPairs, token_pairs) MODEL_CONFIG_MEMBER(TokenPairs, special_token_pairs) + // All added_tokens (special + non-special) — used for encoding regex only. + MODEL_CONFIG_MEMBER(TokenPairs, all_added_token_pairs) + + // Tokens with rstrip=true consume leading whitespace after the token; + // tokens with lstrip=true consume trailing whitespace before the token. + std::unordered_set rstrip_tokens; + std::unordered_set lstrip_tokens; MODEL_CONFIG_MEMBER(std::vector, merges) MODEL_CONFIG_MEMBER(bool, byte_fallback) @@ -131,6 +155,11 @@ class ModelConfig { MODEL_CONFIG_MEMBER(std::string, continuing_subword_prefix) MODEL_CONFIG_MEMBER(size_t, max_input_chars_per_word) + // Data for UnigramModel + using UnigramVocab = std::vector>; + MODEL_CONFIG_MEMBER(UnigramVocab, unigram_vocab) + MODEL_CONFIG_MEMBER(size_t, unigram_unk_id) + // Paths for extra config files (HuggingFace specific) MODEL_CONFIG_MEMBER(std::string, model_config_path) MODEL_CONFIG_MEMBER(std::string, special_tokens_map_path) @@ -140,12 +169,33 @@ class ModelConfig { /** * Populate from a json config file (the root tokenizer.json) */ - ModelConfig &parse_json(const nlohmann::json &json_config); + ModelConfig& parse_json(const nlohmann::json& json_config); /** * Construct the model instance from the member data */ Model::Ptr create() const; + + private: + // Per-type factory helpers called by create(). + Model::Ptr create_bpe( + detail::TokenMap token_map, + detail::TokenMap special_token_map, + std::unique_ptr regex, + const struct SequenceTokenIds& ids) const; + Model::Ptr create_wordpiece( + detail::TokenMap token_map, + detail::TokenMap special_token_map, + const struct SequenceTokenIds& ids) const; + Model::Ptr create_unigram( + detail::TokenMap special_token_map, + std::unique_ptr regex, + const struct SequenceTokenIds& ids) const; + Model::Ptr create_wordlevel( + detail::TokenMap token_map, + detail::TokenMap special_token_map, + std::unique_ptr regex, + const struct SequenceTokenIds& ids) const; }; } // namespace tokenizers \ No newline at end of file diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/normalizer.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/normalizer.h index 8b649e35d2..9a71ae7a7d 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/normalizer.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/normalizer.h @@ -26,242 +26,217 @@ namespace tokenizers { // -- Base --------------------------------------------------------------------- -/** - * Base class for all normalizers with a single virtual method to normalize the - * input string - */ class Normalizer { -public: - /** Shared pointer type */ - typedef std::shared_ptr Ptr; - - /** Normalize the input string - * - * This normalization may result in a string that is different from the - * original input, therefore the resulting string will be owned by the caller. - * - * NOTE: Pass by value per best practice - * https://abseil.io/docs/cpp/guides/strings#string_view - */ - virtual std::string normalize(const std::string &input) const = 0; + public: + using Ptr = std::unique_ptr; + virtual std::string normalize(const std::string& input) const = 0; virtual ~Normalizer() = default; -}; // end class Normalizer +}; // -- Factory ------------------------------------------------------------------ -// Helper macro to standardize addition of config member fields -#define NORMALIZER_CONFIG_MEMBER(type, name) \ - std::optional name; \ - NormalizerConfig &set_##name(type arg) { \ - this->name = std::move(arg); \ - return *this; \ +#define NORMALIZER_CONFIG_MEMBER(type, name) \ + std::optional name; \ + NormalizerConfig& set_##name(type arg) { \ + this->name = std::move(arg); \ + return *this; \ } -/** - * Factory and config class for creating a new Normalizer - * - * This class is the central method for instantiating a Normalizer instance. - * It contains the common construction logic and config parameter names for all - * normalizer constructor args. - * - * NOTE: When adding a new normalizer, you must ensure its arguments are - * added to this class and it's constructor is added in the implementation! - * - * Usage Example: - * - * const auto normalizer = NormalizerConfig("Replace") - * .set_pattern(" ") - * .set_content("▁") - * .create(); - * const auto normalized = normalizer->normalize("Hello World!"); - */ class NormalizerConfig { -public: - /*------------------------*/ - /* Public mutable members */ - /*------------------------*/ - - /** - * The Type name string matching from tokenizers - * https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/mod.rs - */ + public: std::string type; - /** - * Used by: ReplaceNormalizer - */ + // Replace NORMALIZER_CONFIG_MEMBER(std::string, pattern) - - /** - * Used by: ReplaceNormalizer - */ NORMALIZER_CONFIG_MEMBER(std::string, content) - /** - * Used by: SequenceNormalizer - */ + // Sequence using Configs = std::vector; NORMALIZER_CONFIG_MEMBER(Configs, normalizers) - /** - * Used by: PrependNormalizer - */ + // Prepend NORMALIZER_CONFIG_MEMBER(std::string, prepend) - /** - * Used by: BertNormalizer - */ + // BertNormalizer NORMALIZER_CONFIG_MEMBER(bool, clean_text) NORMALIZER_CONFIG_MEMBER(bool, handle_chinese_chars) NORMALIZER_CONFIG_MEMBER(bool, lowercase) NORMALIZER_CONFIG_MEMBER(bool, strip_accents) - /*----------------*/ - /* Public methods */ - /*----------------*/ + // Strip + NORMALIZER_CONFIG_MEMBER(bool, strip_left) + NORMALIZER_CONFIG_MEMBER(bool, strip_right) - /** - * Construct with the type - */ - explicit NormalizerConfig(std::string type = ""); + // Precompiled + NORMALIZER_CONFIG_MEMBER(std::string, precompiled_charsmap) - /** - * Construct the normalizer instance from the member data - */ + explicit NormalizerConfig(std::string type = ""); Normalizer::Ptr create() const; - - /** - * Populate from a json config file - */ - NormalizerConfig &parse_json(const nlohmann::json &json_config); - -}; // end class NormalizerConfig + NormalizerConfig& parse_json(const nlohmann::json& json_config); +}; // -- Replace ------------------------------------------------------------------ -// Used for general-purpose string replacement normalization -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/replace.rs class ReplaceNormalizer : public Normalizer { -public: - /** - * @param pattern: The pattern to search for (can be a string or regex) - * @param content: The replacement content - */ - explicit ReplaceNormalizer(const std::string &pattern, - const std::string &content) + public: + explicit ReplaceNormalizer( + const std::string& pattern, + const std::string& content) : regex_(ReplaceNormalizer::create_regex_(pattern)), content_(content) {} - /** Normalize with the stored pattern replacement */ - std::string normalize(const std::string &input) const override; - -protected: - static std::unique_ptr create_regex_(const std::string &pattern); + std::string normalize(const std::string& input) const override; + protected: + static std::unique_ptr create_regex_(const std::string& pattern); std::unique_ptr regex_; const std::string content_; - -}; // end class ReplaceNormalizer +}; // -- Prepend ------------------------------------------------------------------ -// Used to prepend a string to the input -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/prepend.rs class PrependNormalizer : public Normalizer { -public: - /** - * @param prepend: The string to prepend - */ - explicit PrependNormalizer(const std::string &prepend) : prepend_(prepend) {} + public: + explicit PrependNormalizer(const std::string& prepend) : prepend_(prepend) {} + std::string normalize(const std::string& input) const override; - /** Prepend the stored string */ - std::string normalize(const std::string &input) const override; - -protected: + protected: const std::string prepend_; - -}; // end class PrependNormalizer +}; // -- Sequence ----------------------------------------------------------------- -// Used by tokenizers -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/sequence.rs class SequenceNormalizer : public Normalizer { -public: - /** - * @param normalizers: The sequence of owned normalizer objects to use - */ + public: explicit SequenceNormalizer(std::vector normalizers); + std::string normalize(const std::string& input) const override; - /** Perform normalization */ - std::string normalize(const std::string &input) const override; - -private: + private: const std::vector normalizers_; - -}; // end class SequenceNormalizer +}; // -- NFC ---------------------------------------------------------------------- -// Used for Unicode NFC (Normalization Form Canonical Composition) normalization -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/unicode.rs class NFCNormalizer : public Normalizer { -public: - /** Default constructor */ - explicit NFCNormalizer() = default; + public: + NFCNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; - /** Normalize with NFC Unicode normalization */ - std::string normalize(const std::string &input) const override; +// -- NFD ---------------------------------------------------------------------- -}; // end class NFCNormalizer +class NFDNormalizer : public Normalizer { + public: + NFDNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; -// -- Lowercase ---------------------------------------------------------------- -// Used for lowercasing the input -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/utils.rs +// -- NFKC --------------------------------------------------------------------- +// Note: Full NFKC requires compatibility decomposition data not available in +// our Unicode library. We approximate with NFC which handles the majority of +// real-world cases. For models that truly need NFKC (rare), consider the +// Precompiled normalizer which embeds the exact rules. -class LowercaseNormalizer : public Normalizer { -public: - /** Default constructor */ - explicit LowercaseNormalizer() = default; +class NFKCNormalizer : public Normalizer { + public: + NFKCNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; - /** Lowercase the input */ - std::string normalize(const std::string &input) const override; +// -- NFKD --------------------------------------------------------------------- +// Same caveat as NFKC — approximated with NFD. -}; // end class LowercaseNormalizer +class NFKDNormalizer : public Normalizer { + public: + NFKDNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; -// -- Bert --------------------------------------------------------------------- -// Used for BERT-style normalization (cleaning, lowercasing, accent removal) -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/bert.rs +// -- Lowercase ---------------------------------------------------------------- + +class LowercaseNormalizer : public Normalizer { + public: + LowercaseNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; + +// -- BertNormalizer ----------------------------------------------------------- class BertNormalizer : public Normalizer { -public: - /** - * @param clean_text: Whether to clean the text (remove control chars, etc.) - * @param handle_chinese_chars: Whether to put spaces around Chinese - * characters - * @param lowercase: Whether to lowercase the input - * @param strip_accents: Whether to strip accents (optional, usually follows - * lowercase) - */ - explicit BertNormalizer(bool clean_text, bool handle_chinese_chars, - bool lowercase, std::optional strip_accents) - : clean_text_(clean_text), handle_chinese_chars_(handle_chinese_chars), - lowercase_(lowercase), strip_accents_(strip_accents) {} - - /** Perform BERT normalization steps */ - std::string normalize(const std::string &input) const override; - -protected: + public: + explicit BertNormalizer( + bool clean_text, + bool handle_chinese_chars, + bool lowercase, + std::optional strip_accents) + : clean_text_(clean_text), + handle_chinese_chars_(handle_chinese_chars), + lowercase_(lowercase), + strip_accents_(strip_accents) {} + + std::string normalize(const std::string& input) const override; + + protected: const bool clean_text_; const bool handle_chinese_chars_; const bool lowercase_; const std::optional strip_accents_; }; +// -- Strip -------------------------------------------------------------------- +// Strips leading/trailing whitespace. + +class StripNormalizer : public Normalizer { + public: + explicit StripNormalizer(bool strip_left = true, bool strip_right = true) + : strip_left_(strip_left), strip_right_(strip_right) {} + std::string normalize(const std::string& input) const override; + + private: + const bool strip_left_; + const bool strip_right_; +}; + +// -- StripAccents ------------------------------------------------------------- +// Removes combining diacritical marks (accents) via NFD decomposition. + +class StripAccentsNormalizer : public Normalizer { + public: + StripAccentsNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; + +// -- Nmt ---------------------------------------------------------------------- +// Normalizes whitespace and control characters (NMT-style). + +class NmtNormalizer : public Normalizer { + public: + NmtNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; + +// -- ByteLevel ---------------------------------------------------------------- +// Maps each byte to a visible UTF-8 character (GPT-2 style byte encoding). + +class ByteLevelNormalizer : public Normalizer { + public: + ByteLevelNormalizer() = default; + std::string normalize(const std::string& input) const override; +}; + +// -- Precompiled -------------------------------------------------------------- +// Uses SentencePiece's precompiled charsmap (double-array trie) for +// normalization. The binary blob is stored base64-encoded in tokenizer.json. + +class PrecompiledNormalizer : public Normalizer { + public: + explicit PrecompiledNormalizer(const std::string& precompiled_charsmap); + ~PrecompiledNormalizer() override; + std::string normalize(const std::string& input) const override; + + private: + struct Impl; + std::unique_ptr impl_; +}; + } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/padding.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/padding.h index d94bc6d0dd..68694ae648 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/padding.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/padding.h @@ -43,14 +43,13 @@ struct PaddingParams { }; class Padding { -public: - /** Shared pointer type */ - typedef std::shared_ptr Ptr; + public: + using Ptr = std::unique_ptr; /** * @param params: The padding parameters */ - explicit Padding(const PaddingParams ¶ms); + explicit Padding(const PaddingParams& params); /** * Pad the tokens according to the configuration @@ -61,24 +60,25 @@ class Padding { * Generate attention mask for the padded tokens. * 1 for real tokens, 0 for padded tokens. */ - std::vector generate_mask(const std::vector &tokens, - size_t padded_size) const; + std::vector generate_mask( + const std::vector& tokens, + size_t padded_size) const; -private: + private: PaddingParams params_; }; // -- Factory ------------------------------------------------------------------ // Helper macro to standardize addition of config member fields -#define PADDING_CONFIG_MEMBER(type, name) \ - PaddingConfig &set_##name(type arg) { \ - this->params.name = std::move(arg); \ - return *this; \ +#define PADDING_CONFIG_MEMBER(type, name) \ + PaddingConfig& set_##name(type arg) { \ + this->params.name = std::move(arg); \ + return *this; \ } class PaddingConfig { -public: + public: explicit PaddingConfig(std::string strategy = ""); /** @@ -89,15 +89,15 @@ class PaddingConfig { /** * Populate from a json config file */ - PaddingConfig &parse_json(const nlohmann::json &json_config); + PaddingConfig& parse_json(const nlohmann::json& json_config); // Configuration members PaddingParams params; PADDING_CONFIG_MEMBER(PaddingStrategy, strategy) PADDING_CONFIG_MEMBER(PaddingDirection, direction) - - PaddingConfig &set_fixed_size(std::optional arg) { + + PaddingConfig& set_fixed_size(std::optional arg) { this->params.fixed_size = std::move(arg); this->params.strategy = PaddingStrategy::Fixed; return *this; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pcre2_regex.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pcre2_regex.h index 87a4fb8a98..4752307939 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pcre2_regex.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pcre2_regex.h @@ -23,7 +23,7 @@ namespace tokenizers { * @brief PCRE2-based implementation of IRegex. */ class Pcre2Regex : public IRegex { -public: + public: /** * @brief Construct a PCRE2 regex. */ @@ -34,7 +34,7 @@ class Pcre2Regex : public IRegex { * @param pattern The regex pattern to compile. * @return An Error object indicating success or failure of the compilation. */ - virtual Error compile(const std::string &pattern) override; + virtual Error compile(const std::string& pattern) override; /** * @brief Destructor to clean up PCRE2 resources. @@ -44,11 +44,11 @@ class Pcre2Regex : public IRegex { /** * @brief Return all non-overlapping matches found in the input string. */ - virtual std::vector find_all(const std::string &text) const override; + virtual std::vector find_all(const std::string& text) const override; -private: - pcre2_code *regex_; - pcre2_match_data *match_data_; + private: + pcre2_code* regex_; + pcre2_match_data* match_data_; }; } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/post_processor.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/post_processor.h index bbda41db35..9bc89423da 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/post_processor.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/post_processor.h @@ -25,9 +25,8 @@ namespace tokenizers { * Base class for all post-processors */ class PostProcessor { -public: - /** Shared pointer type */ - using Ptr = std::shared_ptr; + public: + using Ptr = std::unique_ptr; virtual ~PostProcessor() = default; @@ -43,27 +42,27 @@ class PostProcessor { * taking Encoding and an Option, we use overloads here * to explicitly handle single vs pair sequences while processing raw IDs. */ - virtual std::vector - process(const std::vector &tokens, - bool add_special_tokens = true) const = 0; + virtual std::vector process( + const std::vector& tokens, + bool add_special_tokens = true) const = 0; /** * Process the token IDs (pair sequence). */ - virtual std::vector - process(const std::vector &tokens_a, - const std::vector &tokens_b, - bool add_special_tokens = true) const = 0; + virtual std::vector process( + const std::vector& tokens_a, + const std::vector& tokens_b, + bool add_special_tokens = true) const = 0; }; // -- Factory/Common Types ----------------------------------------------------- // Helper macro to standardize addition of config member fields -#define POST_PROCESSOR_CONFIG_MEMBER(type, name) \ - std::optional name; \ - PostProcessorConfig &set_##name(type arg) { \ - this->name = std::move(arg); \ - return *this; \ +#define POST_PROCESSOR_CONFIG_MEMBER(type, name) \ + std::optional name; \ + PostProcessorConfig& set_##name(type arg) { \ + this->name = std::move(arg); \ + return *this; \ } enum class SequenceId { A, B }; @@ -91,7 +90,7 @@ using Template = std::vector; // -- Config ------------------------------------------------------------------- class PostProcessorConfig { -public: + public: using SpecialTokenMap = std::map; using StringIdPair = std::pair; @@ -116,72 +115,82 @@ class PostProcessorConfig { PostProcessor::Ptr create() const; - PostProcessorConfig &parse_json(const nlohmann::json &json_config); + PostProcessorConfig& parse_json(const nlohmann::json& json_config); }; // -- TemplateProcessing ------------------------------------------------------- class TemplateProcessing : public PostProcessor { -public: - TemplateProcessing(Template single, Template pair, - std::map special_tokens); + public: + TemplateProcessing( + Template single, + Template pair, + std::map special_tokens); size_t added_tokens(bool is_pair) const override; - std::vector process(const std::vector &tokens, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens, + bool add_special_tokens = true) const override; - std::vector process(const std::vector &tokens_a, - const std::vector &tokens_b, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens_a, + const std::vector& tokens_b, + bool add_special_tokens = true) const override; -private: + private: Template single_; Template pair_; std::map special_tokens_; size_t added_single_; size_t added_pair_; - std::vector apply_template(const Template &tmpl, - const std::vector &tokens_a, - const std::vector *tokens_b, - bool add_special_tokens) const; + std::vector apply_template( + const Template& tmpl, + const std::vector& tokens_a, + const std::vector* tokens_b, + bool add_special_tokens) const; }; class Sequence : public PostProcessor { -public: + public: explicit Sequence(std::vector processors); size_t added_tokens(bool is_pair) const override; - std::vector process(const std::vector &tokens, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens, + bool add_special_tokens = true) const override; - std::vector process(const std::vector &tokens_a, - const std::vector &tokens_b, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens_a, + const std::vector& tokens_b, + bool add_special_tokens = true) const override; -private: + private: std::vector processors_; }; // -- BertProcessing ----------------------------------------------------------- // Used for BERT post-processing (adding special tokens) class BertProcessing : public PostProcessor { -public: - BertProcessing(std::pair sep, - std::pair cls); + public: + BertProcessing( + std::pair sep, + std::pair cls); size_t added_tokens(bool is_pair) const override; - std::vector process(const std::vector &tokens, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens, + bool add_special_tokens = true) const override; - std::vector process(const std::vector &tokens_a, - const std::vector &tokens_b, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens_a, + const std::vector& tokens_b, + bool add_special_tokens = true) const override; -private: + private: std::pair sep_; std::pair cls_; }; @@ -189,21 +198,25 @@ class BertProcessing : public PostProcessor { // -- RobertaProcessing -------------------------------------------------------- // Used for RoBERTa post-processing class RobertaProcessing : public PostProcessor { -public: - RobertaProcessing(std::pair sep, - std::pair cls, bool trim_offsets, - bool add_prefix_space); + public: + RobertaProcessing( + std::pair sep, + std::pair cls, + bool trim_offsets, + bool add_prefix_space); size_t added_tokens(bool is_pair) const override; - std::vector process(const std::vector &tokens, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens, + bool add_special_tokens = true) const override; - std::vector process(const std::vector &tokens_a, - const std::vector &tokens_b, - bool add_special_tokens = true) const override; + std::vector process( + const std::vector& tokens_a, + const std::vector& tokens_b, + bool add_special_tokens = true) const override; -private: + private: std::pair sep_; std::pair cls_; bool trim_offsets_; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pre_tokenizer.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pre_tokenizer.h index a6b2cce1b8..8fe6e8b96f 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pre_tokenizer.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pre_tokenizer.h @@ -26,263 +26,238 @@ namespace tokenizers { // -- Base --------------------------------------------------------------------- -/** - * Base class for all pre-tokenizers with a single virtual method to split the - * input string piece - */ class PreTokenizer { -public: - /** Shared pointer type */ - typedef std::shared_ptr Ptr; - - /** Split the input string piece into sub-pieces - * - * This pre-tokenization may result in sub-pieces that are not contained - * within the original input, therefore the resulting pieces will be owned by - * the caller. - * - * NOTE: Pass by value per best practice - * https://abseil.io/docs/cpp/guides/strings#string_view - */ - virtual std::vector - pre_tokenize(const std::string &input) const = 0; + public: + using Ptr = std::unique_ptr; + + virtual std::vector pre_tokenize( + const std::string& input) const = 0; virtual ~PreTokenizer() = default; -}; // end class PreTokenizer +}; // -- Factory ------------------------------------------------------------------ -// Helper macro to standardize addition of config member fields -#define PRETOKENIZER_CONFIG_MEMBER(type, name) \ - std::optional name; \ - PreTokenizerConfig &set_##name(type arg) { \ - this->name = std::move(arg); \ - return *this; \ +#define PRETOKENIZER_CONFIG_MEMBER(type, name) \ + std::optional name; \ + PreTokenizerConfig& set_##name(type arg) { \ + this->name = std::move(arg); \ + return *this; \ } -/** - * Factory and config class for creating a new PreTokenizer - * - * This class is the central method for instantiating a PreTokenizer instance. - * It contains the common construction logic and config parameter names for all - * pre tokenizer constructor args. - * - * NOTE: When adding a new pre tokenizer, you must ensure its arguments are - * added to this class and it's constructor is added in the implementation! - * - * Usage Example: - * - * const auto pre_tokenizer = PreTokenizerConfig("Sequence").set_pretokenizers( - * {PreTokenizerConfig("Digits"), PreTokenizerConfig("ByteLevel")} - * ); - * const auto pre_tokenized = pre_tokenizer->pre_tokenize("Hello World!"); - */ class PreTokenizerConfig { -public: - /*------------------------*/ - /* Public mutable members */ - /*------------------------*/ - - /** - * The Type name string matching from tokenizers - * https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/mod.rs#L73 - */ + public: std::string type; - /** - * Used by: RegexPreTokenizer, ByteLevelPreTokenizer - */ + // Split / RegexPreTokenizer PRETOKENIZER_CONFIG_MEMBER(std::string, pattern) + PRETOKENIZER_CONFIG_MEMBER(bool, is_delimiter) + PRETOKENIZER_CONFIG_MEMBER(std::string, behavior) + PRETOKENIZER_CONFIG_MEMBER(bool, invert) - /** - * Used by: DigitsPreTokenizer - */ + // Digits PRETOKENIZER_CONFIG_MEMBER(bool, individual_digits) - /** - * Used by: ByteLevelPreTokenizer - */ + // ByteLevel PRETOKENIZER_CONFIG_MEMBER(bool, add_prefix_space) - - /** - * Used by: ByteLevelPreTokenizer - */ PRETOKENIZER_CONFIG_MEMBER(bool, use_regex) - /** - * Used by RegexPreTokenizer - */ - PRETOKENIZER_CONFIG_MEMBER(bool, is_delimiter) + // Metaspace + PRETOKENIZER_CONFIG_MEMBER(std::string, replacement) + PRETOKENIZER_CONFIG_MEMBER(std::string, prepend_scheme) + PRETOKENIZER_CONFIG_MEMBER(bool, split) - /** - * Used by RegexPreTokenizer - Split behavior - */ - PRETOKENIZER_CONFIG_MEMBER(std::string, behavior) + // CharDelimiterSplit + PRETOKENIZER_CONFIG_MEMBER(std::string, delimiter) - /** - * Used by RegexPreTokenizer - Split invert flag - */ - PRETOKENIZER_CONFIG_MEMBER(bool, invert) + // Punctuation — reuses `behavior` + + // FixedLength + PRETOKENIZER_CONFIG_MEMBER(size_t, length) - /** - * Used by: SequencePreTokenizer - */ + // Sequence using Configs = std::vector; PRETOKENIZER_CONFIG_MEMBER(Configs, pretokenizers) - /*----------------*/ - /* Public methods */ - /*----------------*/ - - /** - * Construct with the type - */ explicit PreTokenizerConfig(std::string type = ""); - - /** - * Construct the pre tokenizer instance from the member data - */ PreTokenizer::Ptr create() const; + PreTokenizerConfig& parse_json(const nlohmann::json& json_config); +}; - /** - * Populate from a json config file - */ - PreTokenizerConfig &parse_json(const nlohmann::json &json_config); - -}; // end class PreTokenizerConfig - -// -- Regex -------------------------------------------------------------------- -// Used for general-purpose single-regex pre tokenization -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/split.rs -// -// TODO: Support for "behavior" and "invert" options -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/tokenizer/normalizer.rs#L82 -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/tokenizer/pattern.rs#L128 +// -- Split (Regex) ------------------------------------------------------------ class RegexPreTokenizer : public PreTokenizer { -public: - /** - * @param pattern: The regex pattern to use for token splitting - * @param is_delimiter: Whether treat `pattern` as delimiter characters, or - * use `pattern` as a regex pattern. - * @param behavior: Split behavior ("MergedWithPrevious" or "Isolated" - * supported) For example: "pre_tokenizer": { "type": "Split", "pattern": { - * "String": " " - * }, - * "behavior": "Isolated", - * "invert": false - * }, - * - * Behavior options: - * - "MergedWithPrevious": Include delimiter with previous token - * Example: "the-final--countdown" -> ["the-", "final-", "-", "countdown"] - * - "Isolated": Keep delimiters as separate tokens - * Example: "the-final--countdown" -> ["the", "-", "final", "-", "-", - * "countdown"] - * - * Notice that the `invert` option is not supported. - */ - explicit RegexPreTokenizer(const std::string &pattern, - bool is_delimiter = false, - const std::string &behavior = "Removed") + public: + explicit RegexPreTokenizer( + const std::string& pattern, + bool is_delimiter = false, + const std::string& behavior = "Removed") : regex_(RegexPreTokenizer::create_regex_(pattern)), - is_delimiter_(is_delimiter), behavior_(behavior) { - if (behavior_.empty() || - (behavior_ != "Removed" && behavior_ != "MergedWithPrevious" && - behavior_ != "Isolated")) { - throw std::runtime_error("Unsupported behavior: " + behavior_); - } - } + is_delimiter_(is_delimiter), + behavior_(behavior) {} - /** Pre-tokenize with the stored regex */ - std::vector pre_tokenize(const std::string &input) const; + std::vector pre_tokenize(const std::string& input) const; -protected: - static std::unique_ptr create_regex_(const std::string &pattern); + protected: + static std::unique_ptr create_regex_(const std::string& pattern); std::unique_ptr regex_; const bool is_delimiter_; const std::string behavior_; - -}; // end class RegexPreTokenizer +}; // -- Digits ------------------------------------------------------------------- -// Used by tokenizers -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/digits.rs class DigitsPreTokenizer : public RegexPreTokenizer { -public: + public: explicit DigitsPreTokenizer(bool individual_digits = false) - : RegexPreTokenizer(individual_digits ? R"([^\p{N}]+|\p{N})" - : R"([^\p{N}]+|[\p{N}]+)") {} -}; // end class DigitsPreTokenizer + : RegexPreTokenizer( + individual_digits ? R"([^\p{N}]+|\p{N})" + : R"([^\p{N}]+|[\p{N}]+)") {} +}; // -- ByteLevel ---------------------------------------------------------------- -// Used by tokenizers -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/byte_level.rs class ByteLevelPreTokenizer : public PreTokenizer { -public: - /** - * @param add_prefix_space: Whether to add a leading space to the first word - * @param pattern: A user-supplied regex to use for token splitting. If not - * provided, it use the standard GPT2 pattern. - * @param use_regex: Whether to use regex for splitting. If false, only apply - * byte encoding without splitting. - */ - ByteLevelPreTokenizer(bool add_prefix_space = true, - const std::string &pattern = "", bool use_regex = true); - explicit ByteLevelPreTokenizer(const std::string &pattern) + public: + ByteLevelPreTokenizer( + bool add_prefix_space = true, + const std::string& pattern = "", + bool use_regex = true); + explicit ByteLevelPreTokenizer(const std::string& pattern) : ByteLevelPreTokenizer(true, pattern, true) {} - /** Perform pre-tokenization */ - std::vector - pre_tokenize(const std::string &input) const override; + std::vector pre_tokenize( + const std::string& input) const override; -private: + private: const std::string pattern_; const bool add_prefix_space_; const bool use_regex_; - -}; // end class ByteLevelPreTokenizer +}; // -- Sequence ----------------------------------------------------------------- -// Used by tokenizers -// CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/sequence.rs class SequencePreTokenizer : public PreTokenizer { -public: - /** - * @param pre_tokenizers: The sequence of owned pre-tokenizer objects to use - */ + public: explicit SequencePreTokenizer(std::vector pre_tokenizers); - /** Perform pre-tokenization */ - std::vector - pre_tokenize(const std::string &input) const override; + std::vector pre_tokenize( + const std::string& input) const override; -private: + private: const std::vector pre_tokenizers_; - -}; // end class SequencePreTokenizer +}; // -- Bert --------------------------------------------------------------------- -// Used for BERT-style pre-tokenization (splitting on whitespace and -// punctuation) CITE: -// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/bert.rs class BertPreTokenizer : public PreTokenizer { -public: + public: BertPreTokenizer() = default; - /** Perform BERT pre-tokenization */ - std::vector - pre_tokenize(const std::string &input) const override; + std::vector pre_tokenize( + const std::string& input) const override; +}; + +// -- Metaspace ---------------------------------------------------------------- +// Replaces spaces with a replacement char (default ▁) and optionally splits. +// Used by SentencePiece-based HF tokenizers (T5, ALBERT, XLNet, etc.) + +class MetaspacePreTokenizer : public PreTokenizer { + public: + enum class PrependScheme { Always, First, Never }; + + explicit MetaspacePreTokenizer( + const std::string& replacement = "\xe2\x96\x81", + PrependScheme prepend_scheme = PrependScheme::Always, + bool split = true) + : replacement_(replacement), + prepend_scheme_(prepend_scheme), + split_(split) {} + + std::vector pre_tokenize( + const std::string& input) const override; + + private: + const std::string replacement_; + const PrependScheme prepend_scheme_; + const bool split_; +}; + +// -- Whitespace --------------------------------------------------------------- +// Matches word chars and non-whitespace-non-word chars: \w+|[^\w\s]+ + +class WhitespacePreTokenizer : public RegexPreTokenizer { + public: + WhitespacePreTokenizer() + : RegexPreTokenizer(R"(\w+|[^\w\s]+)") {} +}; + +// -- WhitespaceSplit ---------------------------------------------------------- +// Splits on any whitespace character (removes whitespace). + +class WhitespaceSplitPreTokenizer : public PreTokenizer { + public: + WhitespaceSplitPreTokenizer() = default; + + std::vector pre_tokenize( + const std::string& input) const override; +}; + +// -- Punctuation -------------------------------------------------------------- +// Splits on punctuation characters with configurable behavior (default +// Isolated). + +class PunctuationPreTokenizer : public PreTokenizer { + public: + explicit PunctuationPreTokenizer(const std::string& behavior = "Isolated") + : behavior_(behavior) {} + + std::vector pre_tokenize( + const std::string& input) const override; + + private: + const std::string behavior_; +}; + +// -- CharDelimiterSplit ------------------------------------------------------- +// Splits on a single delimiter character (Removed behavior). + +class CharDelimiterSplitPreTokenizer : public PreTokenizer { + public: + explicit CharDelimiterSplitPreTokenizer(const std::string& delimiter) + : delimiter_(delimiter) {} + + std::vector pre_tokenize( + const std::string& input) const override; + + private: + const std::string delimiter_; +}; + +// -- UnicodeScripts ----------------------------------------------------------- +// Splits on Unicode script boundaries. + +class UnicodeScriptsPreTokenizer : public PreTokenizer { + public: + UnicodeScriptsPreTokenizer() = default; + + std::vector pre_tokenize( + const std::string& input) const override; +}; + +// -- FixedLength -------------------------------------------------------------- +// Splits into chunks of a fixed number of characters. + +class FixedLengthPreTokenizer : public PreTokenizer { + public: + explicit FixedLengthPreTokenizer(size_t length = 5) : length_(length) {} + + std::vector pre_tokenize( + const std::string& input) const override; -}; // end class BertPreTokenizer + private: + const size_t length_; +}; } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/re2_regex.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/re2_regex.h index 325ca5b922..e3b7391ed7 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/re2_regex.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/re2_regex.h @@ -21,7 +21,7 @@ namespace tokenizers { * @brief RE2-based implementation of IRegex. */ class Re2Regex : public IRegex { -public: + public: /** * @brief Construct a RE2 regex. */ @@ -32,14 +32,14 @@ class Re2Regex : public IRegex { * @param pattern The regex pattern to compile. * @return An Error object indicating success or failure of the compilation. */ - virtual Error compile(const std::string &pattern) override; + virtual Error compile(const std::string& pattern) override; /** * @brief Return all non-overlapping matches found in the input string. */ - virtual std::vector find_all(const std::string &text) const override; + virtual std::vector find_all(const std::string& text) const override; -private: + private: std::unique_ptr regex_; }; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/regex.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/regex.h index 3043690c0f..33a7ade66b 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/regex.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/regex.h @@ -18,14 +18,14 @@ namespace tokenizers { struct Match { size_t start; // starting index of the match - size_t end; // ending index of the match (exclusive) + size_t end; // ending index of the match (exclusive) }; /** * @brief Abstract interface for regex wrappers. */ class IRegex { -public: + public: virtual ~IRegex() = default; /** @@ -33,7 +33,7 @@ class IRegex { * @param pattern The regex pattern to compile. * @return An Error object indicating success or failure of the compilation. */ - virtual Error compile(const std::string &pattern) = 0; + virtual Error compile(const std::string& pattern) = 0; /** * @brief Find all non-overlapping matches in the input string. @@ -41,7 +41,7 @@ class IRegex { * @param text The input string to search. * @return A vector of strings containing all matched substrings. */ - virtual std::vector find_all(const std::string &text) const = 0; + virtual std::vector find_all(const std::string& text) const = 0; /** * @brief Escape special regex characters in a string to treat it as literal. @@ -49,12 +49,11 @@ class IRegex { * @param input The input string to escape. * @return The escaped string that can be used as a literal pattern in regex. */ - static std::string escape(const std::string &input); + static std::string escape(const std::string& input); }; // Function pointer type for create_fallback_regex implementations -using FallbackRegexFn = - Result> (*)(const std::string &); +using FallbackRegexFn = Result> (*)(const std::string&); /** * @brief Creates a regex instance. If no strong symbol defined, only @@ -64,7 +63,7 @@ using FallbackRegexFn = * @param pattern The regex pattern to compile. * @return A unique pointer to an IRegex-compatible object. */ -Result> create_regex(const std::string &pattern); +Result> create_regex(const std::string& pattern); bool register_override_fallback_regex(FallbackRegexFn fn); diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/result.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/result.h index c83176da4e..363e1a722e 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/result.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/result.h @@ -13,9 +13,9 @@ #pragma once +#include #include #include -#include #include namespace tokenizers { @@ -30,8 +30,9 @@ namespace tokenizers { * void generate() * @endcode */ -template class Result final { -public: +template +class Result final { + public: /// `value_type` member for generic programming. typedef T value_type; @@ -43,17 +44,17 @@ template class Result final { * a non-Ok value. */ /* implicit */ Result(Error error) - : error_(error == Error::Ok ? Error::Internal : error), hasValue_(false) { - } + : error_(error == Error::Ok ? Error::Internal : error), + hasValue_(false) {} /// Value copy constructor. - /* implicit */ Result(const T &val) : value_(val), hasValue_(true) {} + /* implicit */ Result(const T& val) : value_(val), hasValue_(true) {} /// Value move constructor. - /* implicit */ Result(T &&val) : value_(std::move(val)), hasValue_(true) {} + /* implicit */ Result(T&& val) : value_(std::move(val)), hasValue_(true) {} /// Result move constructor. - /* implicit */ Result(Result &&rhs) noexcept : hasValue_(rhs.hasValue_) { + /* implicit */ Result(Result&& rhs) noexcept : hasValue_(rhs.hasValue_) { if (hasValue_) { // Use the value type's move constructor. new (&value_) T(std::move(rhs.value_)); @@ -76,7 +77,9 @@ template class Result final { * If true, it is guaranteed that `error()` will return `Error::Ok`. * If false, it is guaranteed that `error()` will not return `Error::Ok`. */ - bool ok() const { return hasValue_; } + bool ok() const { + return hasValue_; + } /** * Returns the error code of this Result. @@ -98,7 +101,7 @@ template class Result final { * * Only legal to call if `ok()` returns true. */ - T &get() { + T& get() { CheckOk(); return value_; } @@ -108,7 +111,7 @@ template class Result final { * * Only legal to call if `ok()` returns true. */ - const T &get() const { + const T& get() const { CheckOk(); return value_; } @@ -118,29 +121,29 @@ template class Result final { * * Only legal to call if `ok()` returns true. */ - const T &operator*() const &; - T &operator*() &; + const T& operator*() const&; + T& operator*() &; /* * Returns a pointer to the Result's value. * * Only legal to call if `ok()` returns true. */ - const T *operator->() const; - T *operator->(); + const T* operator->() const; + T* operator->(); -private: + private: /** * Delete default constructor since all Results should contain a value or * error. */ Result() = delete; /// Delete copy constructor since T may not be copyable. - Result(const Result &) = delete; + Result(const Result&) = delete; /// Delete copy assignment since T may not be copyable. - Result &operator=(const Result &) = delete; + Result& operator=(const Result&) = delete; /// Delete move assignment since it's not a supported pattern to reuse Result. - Result &operator=(Result &&rhs) = delete; + Result& operator=(Result&& rhs) = delete; // Panics if ok() would return false; void CheckOk() const { @@ -148,7 +151,7 @@ template class Result final { } union { - T value_; // Used if hasValue_ is true. + T value_; // Used if hasValue_ is true. Error error_; // Used if hasValue_ is false. }; @@ -156,22 +159,26 @@ template class Result final { const bool hasValue_; }; -template const T &Result::operator*() const & { +template +const T& Result::operator*() const& { CheckOk(); return value_; } -template T &Result::operator*() & { +template +T& Result::operator*() & { CheckOk(); return value_; } -template const T *Result::operator->() const { +template +const T* Result::operator->() const { CheckOk(); return &value_; } -template T *Result::operator->() { +template +T* Result::operator->() { CheckOk(); return &value_; } @@ -184,14 +191,15 @@ template T *Result::operator->() { * * @param[in] result__ The Result to unwrap */ -#define TK_UNWRAP_THROW(result__) \ - ({ \ - auto unwrap_result__ = (result__); \ - if (!unwrap_result__.ok()) { \ - throw std::runtime_error("Error: " + std::to_string(static_cast( \ - unwrap_result__.error()))); \ - } \ - std::move(unwrap_result__.get()); \ +#define TK_UNWRAP_THROW(result__) \ + ({ \ + auto unwrap_result__ = (result__); \ + if (!unwrap_result__.ok()) { \ + throw std::runtime_error( \ + "Error: " + \ + std::to_string(static_cast(unwrap_result__.error()))); \ + } \ + std::move(unwrap_result__.get()); \ }) /** @@ -207,34 +215,34 @@ template T *Result::operator->() { #define TK_UNWRAP(result__, ...) TK_INTERNAL_UNWRAP(result__, ##__VA_ARGS__) // Internal only: Use TK_UNWRAP() instead. -#define TK_INTERNAL_UNWRAP(...) \ - TK_INTERNAL_UNWRAP_SELECT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) \ +#define TK_INTERNAL_UNWRAP(...) \ + TK_INTERNAL_UNWRAP_SELECT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) \ (__VA_ARGS__) // Internal only: Use TK_UNWRAP() instead. -#define TK_INTERNAL_UNWRAP_SELECT(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, \ - ...) \ +#define TK_INTERNAL_UNWRAP_SELECT( \ + _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) \ TK_INTERNAL_UNWRAP_##N // Internal only: Use TK_UNWRAP() instead. -#define TK_INTERNAL_UNWRAP_1(result__) \ - ({ \ - auto et_result__ = (result__); \ - if (!et_result__.ok()) { \ - return et_result__.error(); \ - } \ - std::move(*et_result__); \ +#define TK_INTERNAL_UNWRAP_1(result__) \ + ({ \ + auto et_result__ = (result__); \ + if (!et_result__.ok()) { \ + return et_result__.error(); \ + } \ + std::move(*et_result__); \ }) // Internal only: Use TK_UNWRAP() instead. -#define TK_INTERNAL_UNWRAP_2(result__, message__, ...) \ - ({ \ - auto et_result__ = (result__); \ - if (!et_result__.ok()) { \ - TK_LOG(Error, message__, ##__VA_ARGS__); \ - return et_result__.error(); \ - } \ - std::move(*et_result__); \ +#define TK_INTERNAL_UNWRAP_2(result__, message__, ...) \ + ({ \ + auto et_result__ = (result__); \ + if (!et_result__.ok()) { \ + TK_LOG(Error, message__, ##__VA_ARGS__); \ + return et_result__.error(); \ + } \ + std::move(*et_result__); \ }) // Internal only: Use TK_UNWRAP() instead. diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/sentencepiece.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/sentencepiece.h index 8fd682cfde..f75370489c 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/sentencepiece.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/sentencepiece.h @@ -7,33 +7,51 @@ */ // @lint-ignore-every LICENSELINT -// A tokenizer that works with sentencepiece. Used by Llama2. #pragma once -#include "sentencepiece_processor.h" -#include #include +#include #include + +// Forward-declare to avoid leaking sentencepiece headers to consumers. +namespace sentencepiece { +class SentencePieceProcessor; +} // namespace sentencepiece + namespace tokenizers { +/** + * Tokenizer backed by Google's SentencePiece library. + * + * Loads `.model` (protobuf) files produced by the SentencePiece trainer. + * Used by Llama 2/3, Gemma, T5, ALBERT, XLNet, Mistral, and many others. + */ class SPTokenizer : public Tokenizer { -public: + public: explicit SPTokenizer(); ~SPTokenizer() override; - Error load(const std::string &tokenizer_path) override; + Error load(const std::string& tokenizer_path) override; Result id_to_piece(uint64_t token) const override; - Result piece_to_id(const std::string &text) const override; + Result piece_to_id(const std::string& text) const override; + + Result> + encode(const std::string& input, int8_t bos, int8_t eos) const override; - Result> encode(const std::string &input, int8_t bos, - int8_t eos) const override; + /// Streaming single-token decode (for incremental inference). + Result decode( + uint64_t prev_token, + uint64_t token, + bool skip_special_tokens = false) const override; - Result decode(uint64_t prev_token, uint64_t token, - bool skip_special_tokens = false) const override; + /// Batch decode: converts a full token sequence back to text. + Result decode( + const std::vector& tokens, + bool skip_special_tokens = false) const; -private: - std::unique_ptr _processor; + private: + std::unique_ptr processor_; }; } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/sentencepiece_processor.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/sentencepiece_processor.h deleted file mode 100644 index da25aee84f..0000000000 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/sentencepiece_processor.h +++ /dev/null @@ -1,757 +0,0 @@ -// Copyright 2016 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License.! - -#ifndef SENTENCEPIECE_PROCESSOR_H_ -#define SENTENCEPIECE_PROCESSOR_H_ - -#include -#include -#include -#include -#include -#include - -#ifndef SWIG -namespace absl { -using std::string_view; -} // namespace absl -#endif // SWIG - -namespace sentencepiece { -namespace util { - -enum class StatusCode : int { - kOk = 0, - kCancelled = 1, - kUnknown = 2, - kInvalidArgument = 3, - kDeadlineExceeded = 4, - kNotFound = 5, - kAlreadyExists = 6, - kPermissionDenied = 7, - kResourceExhausted = 8, - kFailedPrecondition = 9, - kAborted = 10, - kOutOfRange = 11, - kUnimplemented = 12, - kInternal = 13, - kUnavailable = 14, - kDataLoss = 15, - kUnauthenticated = 16, -}; - -class Status { -public: - Status(); - ~Status(); - Status(StatusCode code, absl::string_view error_message); - Status(const Status &s); - void operator=(const Status &s); - bool operator==(const Status &s) const; - bool operator!=(const Status &s) const; - inline bool ok() const { return rep_ == nullptr; } - - void set_error_message(const char *str); - const char *error_message() const; - const char *message() const { return error_message(); } - StatusCode code() const; - std::string ToString() const; - - void IgnoreError(); - -private: - struct Rep; - std::unique_ptr rep_; -}; -} // namespace util - -// SentencePieceProcessor: -// Simple and language independent tokenizer and de-tokenizer for -// Neural Network Machine Translation. -// -// SentencePieceProcessor provides Encode() and Decode() methods, -// which correspond to tokenization and de-tokenization respectively. -// -// - Encode: -// Given a raw source sentence, encode it into a sequence -// of pieces or vocabulary ids. -// -// - Decode: -// Given a sequence of pieces or vocabulary ids, decode it -// into a de-tokenized raw sentence. -// -// SentencePieceProcessor provides a lossless data conversion -// that allows the original raw sentence to be perfectly reconstructed -// from the encoded data, i.e., Decode(Encode(input)) == input. -// This characteristics is useful, as we can make the de-tokenization -// completely language independent. -// -// Usage: -// SentencePieceProcessor sp; -// sp.Load("//path/to/model"); -// -// vector sps; -// sp.Encode("hello world.", &sps).IgnoreError(); -// -// vector ids; -// sp.Encode("hello world.", &ids).IgnoreError(); -// -// string detok; -// sp.Decode(sps, &detok); -// CHECK_EQ("hello world.", detok).IgnoreError(); -// -// sp.Decode(ids, &detok); -// CHECK_EQ("hello world.", detok).IgnoreError(); -// -// We can also use SentencePieceText which manages the byte-offsets -// between user input (output) and internal sentence pieces. -// -// SentencePieceText spt; -// sp.Encode("hello world.", &spt); -// // Emits the byte range of each piece. -// for (const auto &piece : spt.pieces()) { -// LOG(INFO) << piece.begin() << " " << piece.end(); -// } -// -// sp.Decode({0, 1, 2, 3..}, &spt); -// for (const auto &piece : spt.pieces()) { -// LOG(INFO) << piece.begin() << " " << piece.end(); -// } -// - -class NBestSentencePieceText; -class ModelInterface; -class SentencePieceText; -class ModelProto; -class NormalizerSpec; - -namespace normalizer { -class Normalizer; -} // namespace normalizer - -#ifndef SWIGGO -namespace util { -// Redefine std::string for serialized_proto interface as Python's string is -// a Unicode string. We can enforce the return value to be raw byte sequence -// with SWIG's typemap. -using bytes = std::string; -} // namespace util -#endif // SWIGGO - -class NBestSentencePieceText; -class ModelInterface; -class SentencePieceText; -class SentencePieceText_SentencePiece; - -// Wrapper class of SentencePieceText -// This wrapper only allows an immutable access to the proto and -// hides the actual implementation of protobuf. -// See sentencepiece.proto for the details of this class. -class ImmutableSentencePieceText_ImmutableSentencePiece { -public: - ImmutableSentencePieceText_ImmutableSentencePiece(); - ~ImmutableSentencePieceText_ImmutableSentencePiece() = default; - - const std::string &piece() const; - const std::string &surface() const; - uint32_t id() const; - uint32_t begin() const; - uint32_t end() const; - - friend class ImmutableSentencePieceText; - -private: - explicit ImmutableSentencePieceText_ImmutableSentencePiece( - const SentencePieceText_SentencePiece &sp); - const SentencePieceText_SentencePiece *sp_ = nullptr; -}; - -class ImmutableSentencePieceText { -public: - ImmutableSentencePieceText(); - virtual ~ImmutableSentencePieceText(); - - std::vector pieces() const; - - size_t pieces_size() const; - ImmutableSentencePieceText_ImmutableSentencePiece pieces(int index) const; - - const std::string &text() const; - float score() const; - - util::bytes SerializeAsString() const; - - // Returns the actual mutable proto. - // Do not use this outside of SentencePieceProcessor, as - // it returns the raw pointer managed by the shared_ptr. - SentencePieceText *mutable_proto(); - - // Converts the utf8 byte spans into Unicode char span. - void ConvertToUnicodeSpans(); - - friend class ImmutableNBestSentencePieceText; - -private: - explicit ImmutableSentencePieceText(const SentencePieceText &spt); - const SentencePieceText *spt_ = nullptr; - std::shared_ptr rep_; -}; - -// Wrapper class of SentencePieceText -// This wrapper only allows an immutable access to the proto and -// hides the actual implementation of protobuf. -// See sentencepiece.proto for the details of this class. -class ImmutableNBestSentencePieceText { -public: - ImmutableNBestSentencePieceText(); - virtual ~ImmutableNBestSentencePieceText(); - - std::vector nbests() const; - - size_t nbests_size() const; - ImmutableSentencePieceText nbests(int index) const; - - util::bytes SerializeAsString() const; - - // Returns the actual mutable proto. - // Do not use this outside of SentencePieceProcessor, as - // it returns the raw pointer managed by the shared_ptr. - NBestSentencePieceText *mutable_proto(); - - void ConvertToUnicodeSpans(); - -private: - std::shared_ptr rep_; -}; - -class SentencePieceProcessor { -public: - SentencePieceProcessor(); - virtual ~SentencePieceProcessor(); - - // Loads model from `filename`. - // Returns false if `filename` cannot be loaded. - virtual util::Status Load(absl::string_view filename); - - // Loads model from `filename`. - // Crash if `filename` cannot be loaded. - virtual void LoadOrDie(absl::string_view filename); - - // Loads model from `model_proto`. - // `model_proto` is copied. - virtual util::Status Load(const ModelProto &model_proto); - - // Loads model from `model_proto`. - // `model_proto` is moved. - virtual util::Status Load(std::unique_ptr model_proto); - - // Loads model from `serialized`, which is a string-serialized model proto. - // Useful to load the model from a platform independent blob object. - virtual util::Status LoadFromSerializedProto(absl::string_view serialized); - - // Returns the status. Encode/Decode methods are valid when status is OK. - virtual util::Status status() const; - - // Sets encode extra_option sequence. - virtual util::Status SetEncodeExtraOptions(absl::string_view extra_option); - - // Sets decode extra_option sequence. - virtual util::Status SetDecodeExtraOptions(absl::string_view extra_option); - - ////////////////////////////////////////////////////////////// - // Vocabulary restriction. - // Background: - // https://github.com/rsennrich/subword-nmt#best-practice-advice-for-byte-pair-encoding-in-nmt - - // Restricts the vocabulary set. - // The input sentences are encoded into the tokens in `valid_vocab`. - virtual util::Status - SetVocabulary(const std::vector &valid_vocab); - - // Reverts the vocabulary restriction. - virtual util::Status ResetVocabulary(); - - // Loads the valid vocabulary set from `filename` in TSV format. - // Format: . - // Any token with frequency < threshold will be treated as OOV. - virtual util::Status LoadVocabulary(absl::string_view filename, - int threshold); - - ////////////////////////////////////////////////////////////// - // Simple Encode and Decode API. - // - // Given a UTF8 input, encodes it into a sequence of sentence pieces. - virtual util::Status Encode(absl::string_view input, - std::vector *pieces) const; - - // Given a UTF8 input, encodes it into a sequence of ids. - virtual util::Status Encode(absl::string_view input, - std::vector *ids) const; - - // Given a sequence of pieces, decodes it into a detokenized output. - virtual util::Status Decode(const std::vector &pieces, - std::string *detokenized) const; - - // Given a sequence of pieces, decodes it into a detokenized output. - virtual util::Status Decode(const std::vector &pieces, - std::string *detokenized) const; - - // Given a sequence of ids, decodes it into a detokenized output. - virtual util::Status Decode(const std::vector &ids, - std::string *detokenized) const; - - ////////////////////////////////////////////////////////////// - // NBest API. - // - // Same as Encode, but returns nbest results. - virtual util::Status - NBestEncode(absl::string_view input, int nbest_size, - std::vector> *pieces) const; - - // Same as Encode, but returns nbest results. - virtual util::Status NBestEncode(absl::string_view input, int nbest_size, - std::vector> *ids) const; - - ////////////////////////////////////////////////////////////// - // Sampling API. - // - // Unigram and BPE support sampling mode. - // - Unigram (--model_type=unigram): - // `nbest_size`: When `nbest_size` is positive value, approximately samples - // one segmentation from nbest candidates. When `nbest_size` is negative - // value, samples one segmentation from the hypotheses (Lattice) according to - // the generation probabilities using forward-filtering and backward-sampling - // algorithm. - // `alpha`: Smoothing parameter (inverse temperature). The best segmentation - // (Viterbi segmentation) is more likely sampled when setting larger alpha. - // When alpha is 0.0, one segmentation is uniformly sampled from the nbest or - // lattice. `nbest_size` and `alpha` correspond to parameters `l` and `alpha` - // in https://arxiv.org/abs/1804.10959 (nbest_size < 0 means l = infinity) - // - // - BPE (--model_type=bpe): - // `alpha`: The dropout probability `p` of bpe merge operations in - // https://arxiv.org/abs/1910.13267 Nbest-based sampling is not supported so - // nbest_size parameter is ignored in BPE. - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, - float alpha, - std::vector *pieces) const; - - // Same as above, but returns a sequence of ids. - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, - float alpha, std::vector *ids) const; - - ////////////////////////////////////////////////////////////// - // SampleEncodeAndScore API. - // - // Sample `samples` many tokenisations from the segmentation lattice. - // These methods are only available in model_type=unigram. - // - // `alpha`: smoothing parameter (inverse temperature). The same as `alpha` in - // `Sample` method. - // 'wor`: If `wor` is true, the samples are taken without replacement, and the - // scores are the inclusion probabilities of the elements in the sample; - // otherwise the samples are taken with replacement and the scores are the - // log-probs of sample elements - // `include_best`: If `include_best` is true, the best tokenisation is always - // included in the sample, and the remaining elements are sampled excluding - // the best. - virtual util::Status SampleEncodeAndScore( - absl::string_view input, int num_samples, float alpha, bool wor, - bool include_best, - std::vector, float>> *pieces) const; - - // Same as above, but returns a sequence of ids. - virtual util::Status SampleEncodeAndScore( - absl::string_view input, int num_samples, float alpha, bool wor, - bool include_best, - std::vector, float>> *ids) const; - - ////////////////////////////////////////////////////////////// - // Entropy API. - // - // This only available in model_type=unigram. - // Calculate entropy of possible tokenisations - virtual util::Status CalculateEntropy(absl::string_view input, float alpha, - float *entropy) const; - - ////////////////////////////////////////////////////////////// - // Advanced API returning SentencePieceText, which manages - // utf8-byte alignments between user-input/detokenized text - // and internal sentencepiece sequence. - // - // Given a UTF8 input, encodes it into SentencePieceText. - // - // When using these APIs, sentencepiece.pb.h header files must be included. - // We can also use ImutableSentencePieceText as follows. - // - // ImmutableSentencePieceText spt; - // Encode("hello", spt.mutable_proto()).IgnoreError(); - // std::cout << spt.pieces_size() << std::endl; - virtual util::Status Encode(absl::string_view input, - SentencePieceText *spt) const; - - virtual util::Status NBestEncode(absl::string_view input, int nbest_size, - NBestSentencePieceText *nbest_spt) const; - - virtual util::Status SampleEncode(absl::string_view input, int nbest_size, - float alpha, SentencePieceText *spt) const; - - virtual util::Status - SampleEncodeAndScore(absl::string_view input, int num_samples, float alpha, - bool wor, bool include_best, - NBestSentencePieceText *samples_spt) const; - - // DEPRECATED: Remove this API and use std::vector - virtual util::Status Decode(const std::vector &pieces, - SentencePieceText *spt) const; - - virtual util::Status Decode(const std::vector &pieces, - SentencePieceText *spt) const; - - virtual util::Status Decode(const std::vector &ids, - SentencePieceText *spt) const; -#ifdef SWIG -#define SPP_SWIG_CHECK_AND_THROW \ - if (!status.ok()) \ - throw status; -#else -#define SPP_SWIG_CHECK_AND_THROW \ - if (!status.ok()) { \ - } -#endif // SWIG - -#define DEFINE_SPP_DIRECT_FUNC_IMPL(FuncName, OutType, ...) \ - OutType output; \ - const auto status = FuncName(__VA_ARGS__, &output); \ - SPP_SWIG_CHECK_AND_THROW; \ - return output; - -#define DEFINE_SPP_SERIALIZED_PROTO_IMPL(FuncName, OutType, ...) \ - OutType output; \ - const auto status = FuncName(__VA_ARGS__, output.mutable_proto()); \ - SPP_SWIG_CHECK_AND_THROW; \ - return output.SerializeAsString(); - -#define DEFINE_SPP_IMMUTABLE_PROTO_IMPL(FuncName, OutType, ...) \ - OutType output; \ - const auto status = FuncName(__VA_ARGS__, output.mutable_proto()); \ - SPP_SWIG_CHECK_AND_THROW; \ - return output; - - ////////////////////////////////////////////////////////////// - // Handy methods that return the result directly. - // These functions ignore internal errors. - virtual std::vector - EncodeAsPieces(absl::string_view input) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(Encode, std::vector, input); - } - - virtual std::vector EncodeAsIds(absl::string_view input) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(Encode, std::vector, input); - } - - virtual std::vector> - NBestEncodeAsPieces(absl::string_view input, int nbest_size) const { - DEFINE_SPP_DIRECT_FUNC_IMPL( - NBestEncode, std::vector>, input, nbest_size); - } - - virtual std::vector> - NBestEncodeAsIds(absl::string_view input, int nbest_size) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(NBestEncode, std::vector>, - input, nbest_size); - } - - virtual std::vector SampleEncodeAsPieces(absl::string_view input, - int nbest_size, - float alpha) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(SampleEncode, std::vector, input, - nbest_size, alpha); - } - - virtual std::vector SampleEncodeAsIds(absl::string_view input, - int nbest_size, - float alpha) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(SampleEncode, std::vector, input, - nbest_size, alpha); - } - - virtual std::vector, float>> - SampleEncodeAndScoreAsPieces(absl::string_view input, int num_samples, - float alpha, bool wor, bool include_best) const { - using _T = std::vector, float>>; - DEFINE_SPP_DIRECT_FUNC_IMPL(SampleEncodeAndScore, _T, input, num_samples, - alpha, wor, include_best); - } - - virtual std::vector, float>> - SampleEncodeAndScoreAsIds(absl::string_view input, int num_samples, - float alpha, bool wor, bool include_best) const { - using _T = std::vector, float>>; - DEFINE_SPP_DIRECT_FUNC_IMPL(SampleEncodeAndScore, _T, input, num_samples, - alpha, wor, include_best); - } - - // DEPRECATED: Remove this API and use std::vector - virtual std::string - DecodePieces(const std::vector &pieces) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(Decode, std::string, pieces); - } - - virtual std::string - DecodePieces(const std::vector &pieces) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(Decode, std::string, pieces); - } - - virtual std::string DecodeIds(const std::vector &ids) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(Decode, std::string, ids); - } - - virtual float CalculateEntropy(absl::string_view text, float alpha) const { - DEFINE_SPP_DIRECT_FUNC_IMPL(CalculateEntropy, float, text, alpha); - } - - ////////////////////////////////////////////////////////////// - // SerializedProto API. (DEPRECATED). Use ImmutableProto API. - // They are used in Python interface. Returns serialized proto. - // In python module, we can get access to the full Proto after - // deserialzing the returned byte sequence. - virtual util::bytes EncodeAsSerializedProto(absl::string_view input) const { - DEFINE_SPP_SERIALIZED_PROTO_IMPL(Encode, ImmutableSentencePieceText, input); - } - - virtual util::bytes SampleEncodeAsSerializedProto(absl::string_view input, - int nbest_size, - float alpha) const { - DEFINE_SPP_SERIALIZED_PROTO_IMPL(SampleEncode, ImmutableSentencePieceText, - input, nbest_size, alpha); - } - - virtual util::bytes NBestEncodeAsSerializedProto(absl::string_view input, - int nbest_size) const { - DEFINE_SPP_SERIALIZED_PROTO_IMPL( - NBestEncode, ImmutableNBestSentencePieceText, input, nbest_size); - } - - virtual util::bytes - SampleEncodeAndScoreAsSerializedProto(absl::string_view input, - int num_samples, float alpha, bool wor, - bool include_best) const { - DEFINE_SPP_SERIALIZED_PROTO_IMPL(SampleEncodeAndScore, - ImmutableNBestSentencePieceText, input, - num_samples, alpha, wor, include_best); - } - - // TODO(taku): Remove this API and use std::vector - virtual util::bytes - DecodePiecesAsSerializedProto(const std::vector &pieces) const { - DEFINE_SPP_SERIALIZED_PROTO_IMPL(Decode, ImmutableSentencePieceText, - pieces); - } - - virtual util::bytes DecodePiecesAsSerializedProto( - const std::vector &pieces) const { - DEFINE_SPP_SERIALIZED_PROTO_IMPL(Decode, ImmutableSentencePieceText, - pieces); - } - - virtual util::bytes - DecodeIdsAsSerializedProto(const std::vector &ids) const { - DEFINE_SPP_SERIALIZED_PROTO_IMPL(Decode, ImmutableSentencePieceText, ids); - } - - ////////////////////////////////////////////////////////////// - // ImmutableProto API. - virtual ImmutableSentencePieceText - EncodeAsImmutableProto(absl::string_view input) const { - DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Encode, ImmutableSentencePieceText, input); - } - - virtual ImmutableSentencePieceText - SampleEncodeAsImmutableProto(absl::string_view input, int nbest_size, - float alpha) const { - DEFINE_SPP_IMMUTABLE_PROTO_IMPL(SampleEncode, ImmutableSentencePieceText, - input, nbest_size, alpha); - } - - virtual ImmutableNBestSentencePieceText - NBestEncodeAsImmutableProto(absl::string_view input, int nbest_size) const { - DEFINE_SPP_IMMUTABLE_PROTO_IMPL( - NBestEncode, ImmutableNBestSentencePieceText, input, nbest_size); - } - - virtual ImmutableNBestSentencePieceText - SampleEncodeAndScoreAsImmutableProto(absl::string_view input, int num_samples, - float alpha, bool wor, - bool include_best) const { - DEFINE_SPP_IMMUTABLE_PROTO_IMPL(SampleEncodeAndScore, - ImmutableNBestSentencePieceText, input, - num_samples, alpha, wor, include_best); - } - - // TODO(taku): Remove this API and use std::vector - virtual ImmutableSentencePieceText - DecodePiecesAsImmutableProto(const std::vector &pieces) const { - DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Decode, ImmutableSentencePieceText, pieces); - } - - virtual ImmutableSentencePieceText DecodePiecesAsImmutableProto( - const std::vector &pieces) const { - DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Decode, ImmutableSentencePieceText, pieces); - } - - virtual ImmutableSentencePieceText - DecodeIdsAsImmutableProto(const std::vector &ids) const { - DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Decode, ImmutableSentencePieceText, ids); - } - -#undef DEFINE_SPP_DIRECT_FUNC_IMPL -#undef DEFINE_SPP_SERIALIZED_PROTO_IMPL -#undef DEFINE_SPP_IMMUTABLE_PROTO_IMPL - - ////////////////////////////////////////////////////////////// - // Normalization methods. - - // Normalize `input`. - virtual util::Status Normalize(absl::string_view input, - std::string *normalized) const; - - // Normalize `input`. Stores the utf8-byte offset from - // the normalized string to the original input. - virtual util::Status Normalize(absl::string_view input, - std::string *normalized, - std::vector *norm_to_orig) const; - - virtual std::string Normalize(absl::string_view input) const; - - ////////////////////////////////////////////////////////////// - // Vocabulary management methods. - // - // Returns the size of sentence pieces, which is the same as - // the size of vocabulary for NMT. - virtual int GetPieceSize() const; - - // Returns the vocab id of `piece`. - // Returns UNK(0) if `piece` is unknown. - virtual int PieceToId(absl::string_view piece) const; - - // Returns the string representation of vocab with `id`. - virtual const std::string &IdToPiece(int id) const; - - // Returns the score of `id`. - // Usually score is an emission log probability of unigram language - // model. - virtual float GetScore(int id) const; - - // Returns true if `id` is unknown symbol. - virtual bool IsUnknown(int id) const; - - // Returns true if `id` is control symbol. - virtual bool IsControl(int id) const; - - // Returns true if `id` is unused symbol. - virtual bool IsUnused(int id) const; - - // Returns true if `id` is byte symbol. - virtual bool IsByte(int id) const; - - // Returns the reserved id. - // Returns -1 if not defined. - - // Returns unknown () id. - virtual int unk_id() const; - - // Returns BOS () id. - virtual int bos_id() const; - - // Returns EOS () id. - virtual int eos_id() const; - - // Returns PAD () id. - virtual int pad_id() const; - - ////////////////////////////////////////////////////////////// - // Model management. - // - // Allows injection of a mock model instance. `model` is moved. - void SetModel(std::unique_ptr &&model); - - // Allows injection of a normalizer instance. `normalizer` is moved. - void SetNormalizer(std::unique_ptr &&normalizer); - - // Returns immutable model proto. Useful to obtain extended - // or experimental parameters encoded in model_proto. - const ModelProto &model_proto() const; - - // returns immutable model proto as std::string. - // Useful to save the state of this instance via Python's pickle object. - util::bytes serialized_model_proto() const; - - // Returns mutable normalizer_spec. - // Updating the intenral normalization during the encoding/decoding are not - // recommended and may result in unexpected behavior. Use at your own risk. - NormalizerSpec *mutable_normalizer_spec() const; - -private: - enum ExtraOption { REVERSE, BOS, EOS, UNK_PIECE }; - - util::Status ParseExtraOptions(absl::string_view extra_option, - std::vector *extra_options) const; - - util::Status ApplyExtraOptions(const std::vector &extra_options, - SentencePieceText *spt) const; - - util::Status PopulateSentencePieceText( - absl::string_view input, absl::string_view normalized, - const std::vector &norm_to_orig, - const std::vector> &result, - SentencePieceText *spt) const; - - std::unique_ptr model_; - std::unique_ptr normalizer_; - std::unique_ptr denormalizer_; - - // Underlying model protocol buffer. The same lifetime as model_. - std::unique_ptr model_proto_; - - std::vector encode_extra_options_; - std::vector decode_extra_options_; -}; - -// Set seed value of random generator. -// Do not set static_cast(-1), -// as this seed is reserved for initializing from -// std::random_device. -void SetRandomGeneratorSeed(unsigned int seed); - -// Set the global log level. The default loglevel is 0. -// The log is emitted only when min_log_level >= output_log_level. -void SetMinLogLevel(int v); - -// IO related functions to absorb model formats. -namespace io { -// Loads `model_proto` from `filename`. -// We can instantiate SentencePieceProcessor as follows: -// -// auto model_proto = absl::make_unique(); -// io::LoadModelProto("//path/spm.model", model_proto.get()); -// SentencePieceProcessor sp; -// CHECK_OK(sp.Load(std::move(model_proto))); -util::Status LoadModelProto(absl::string_view, ModelProto *model_proto); - -// Saves `model_proto` as `filename`. -util::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); -} // namespace io -} // namespace sentencepiece -#endif // SENTENCEPIECE_PROCESSOR_H_ diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/std_regex.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/std_regex.h index f8a48f24c9..76910c329d 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/std_regex.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/std_regex.h @@ -8,10 +8,10 @@ #pragma once -#include "regex.h" #include #include #include +#include "regex.h" namespace tokenizers { @@ -19,7 +19,7 @@ namespace tokenizers { * @brief std::regex-based implementation of IRegex. */ class StdRegex : public IRegex { -public: + public: /** * @brief Construct a std::regex wrapper. */ @@ -30,14 +30,14 @@ class StdRegex : public IRegex { * @param pattern The regex pattern to compile. * @return An Error object indicating success or failure of the compilation. */ - virtual Error compile(const std::string &pattern) override; + virtual Error compile(const std::string& pattern) override; /** * @brief Find all non-overlapping matches in the input string. */ - virtual std::vector find_all(const std::string &text) const override; + virtual std::vector find_all(const std::string& text) const override; -private: + private: std::regex regex_; }; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/string_integer_map.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/string_integer_map.h index 7857abf946..e27289d9f7 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/string_integer_map.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/string_integer_map.h @@ -35,11 +35,12 @@ namespace detail { * data being stored. Custom hash functions are supported, with a stateful hash * functor being optionally provided at construction time. */ -template , - typename TIntegerHash = std::hash, - typename TAllocator = std::allocator> +template < + typename TStringHash = std::hash, + typename TIntegerHash = std::hash, + typename TAllocator = std::allocator> class StringIntegerMap { -public: + public: /// @name Constructors /// @{ @@ -52,7 +53,8 @@ class StringIntegerMap { * string and integer in the map must be unique. * @param map map of strings to integers */ - template explicit StringIntegerMap(const TMap &map); + template + explicit StringIntegerMap(const TMap& map); /** * Construct a StringIntegerMap from a map of strings to integers, explicitly @@ -61,8 +63,10 @@ class StringIntegerMap { * @param map map of strings to integers */ template - StringIntegerMap(const TMap &map, TStringHash string_hasher, - TIntegerHash integer_hasher); + StringIntegerMap( + const TMap& map, + TStringHash string_hasher, + TIntegerHash integer_hasher); /// @} /// @name Accessors @@ -94,14 +98,15 @@ class StringIntegerMap { * Retrieves the element in the map at the given index. * @return A pair containing the string and integer at the given index. */ - std::pair - getElement(std::size_t index) const; + std::pair getElement( + std::size_t index) const; /// @} -private: - template class VariableSizedInteger { - public: + private: + template + class VariableSizedInteger { + public: VariableSizedInteger() = default; explicit VariableSizedInteger(TLogical max_value) { @@ -113,29 +118,33 @@ class StringIntegerMap { mask_ = (TLogical(1) << (byte_count_ * 8)) - TLogical(1); } - std::size_t getByteCount() const { return byte_count_; } + std::size_t getByteCount() const { + return byte_count_; + } - TLogical getMask() const { return mask_; } + TLogical getMask() const { + return mask_; + } - std::uint8_t *write(std::uint8_t *target, TLogical value) const { + std::uint8_t* write(std::uint8_t* target, TLogical value) const { std::memcpy(target, &value, byte_count_); return target + byte_count_; } - TLogical read(const std::uint8_t *source) const { + TLogical read(const std::uint8_t* source) const { TLogical value; std::memcpy(&value, source, sizeof(TLogical)); return value & mask_; } - private: + private: std::size_t byte_count_ = 0; TLogical mask_ = 0; }; - bool tryGetInteger(std::string_view str, std::uint64_t &result) const; + bool tryGetInteger(std::string_view str, std::uint64_t& result) const; - bool tryGetString(std::uint64_t integer, std::string_view &result) const; + bool tryGetString(std::uint64_t integer, std::string_view& result) const; std::size_t getBucketIndex(std::string_view value) const; @@ -197,13 +206,15 @@ class StringIntegerMap { template template StringIntegerMap::StringIntegerMap( - const TMap &map) + const TMap& map) : StringIntegerMap(map, TStringHash(), TIntegerHash()) {} template template StringIntegerMap::StringIntegerMap( - const TMap &map, TStringHash string_hasher, TIntegerHash integer_hasher) + const TMap& map, + TStringHash string_hasher, + TIntegerHash integer_hasher) : string_hasher_(string_hasher), integer_hasher_(integer_hasher) { assert(map.size() <= std::numeric_limits::max()); bucket_count_ = size_ = map.size(); @@ -226,7 +237,7 @@ StringIntegerMap::StringIntegerMap( std::uint64_t largest_integer = 0; std::size_t total_string_size = 0; - for (const auto &[str, integer] : map) { + for (const auto& [str, integer] : map) { total_string_size += str.size(); largest_string_size = std::max(largest_string_size, str.size()); largest_integer = std::max(largest_integer, integer); @@ -237,15 +248,19 @@ StringIntegerMap::StringIntegerMap( integer_ = VariableSizedInteger(largest_integer); string_size_ = VariableSizedInteger(largest_string_size); - string_offset_ = VariableSizedInteger(total_string_size); const auto string_element_data_size = ((integer_.getByteCount() + string_size_.getByteCount() + 1) * map.size()) + total_string_size; + + // string_offset_ stores byte offsets into string_element_data_, which + // includes per-element headers (integer + string_size + small_hash bytes) + // on top of the raw string data. Must be sized against the full buffer, + // not just total_string_size, to avoid truncating offsets > 255. + string_offset_ = VariableSizedInteger(string_element_data_size); const auto integer_element_size = integer_.getByteCount() + - string_offset_.getByteCount() + - string_size_.getByteCount(); + string_offset_.getByteCount() + string_size_.getByteCount(); const auto integer_element_data_size = integer_element_size * map.size(); element_offset_ = VariableSizedInteger( @@ -262,41 +277,45 @@ StringIntegerMap::StringIntegerMap( // Set up terminal bucket indices. // - element_offset_.write(string_bucket_data_.data() + - (bucket_count_ * element_offset_.getByteCount()), - string_element_data_size); - element_offset_.write(integer_bucket_data_.data() + - (bucket_count_ * element_offset_.getByteCount()), - integer_element_data_size); + element_offset_.write( + string_bucket_data_.data() + + (bucket_count_ * element_offset_.getByteCount()), + string_element_data_size); + element_offset_.write( + integer_bucket_data_.data() + + (bucket_count_ * element_offset_.getByteCount()), + integer_element_data_size); // // Sort the builder elements. // - std::sort(std::begin(builder_string_elements), - std::end(builder_string_elements), - [this](const BuilderElement &first, const BuilderElement &second) { - const auto first_bucket = first.hash % bucket_count_; - const auto second_bucket = second.hash % bucket_count_; - if (first_bucket == second_bucket) { - const auto first_small_hash = getSmallHash(first.hash); - const auto second_small_hash = getSmallHash(second.hash); - return first_small_hash < second_small_hash; - } - - return first_bucket < second_bucket; - }); - - std::sort(std::begin(builder_integer_elements), - std::end(builder_integer_elements), - [this](const BuilderElement &first, const BuilderElement &second) { - const auto first_bucket = first.hash % bucket_count_; - const auto second_bucket = second.hash % bucket_count_; - if (first_bucket == second_bucket) { - return first.integer < second.integer; - } - - return first_bucket < second_bucket; - }); + std::sort( + std::begin(builder_string_elements), + std::end(builder_string_elements), + [this](const BuilderElement& first, const BuilderElement& second) { + const auto first_bucket = first.hash % bucket_count_; + const auto second_bucket = second.hash % bucket_count_; + if (first_bucket == second_bucket) { + const auto first_small_hash = getSmallHash(first.hash); + const auto second_small_hash = getSmallHash(second.hash); + return first_small_hash < second_small_hash; + } + + return first_bucket < second_bucket; + }); + + std::sort( + std::begin(builder_integer_elements), + std::end(builder_integer_elements), + [this](const BuilderElement& first, const BuilderElement& second) { + const auto first_bucket = first.hash % bucket_count_; + const auto second_bucket = second.hash % bucket_count_; + if (first_bucket == second_bucket) { + return first.integer < second.integer; + } + + return first_bucket < second_bucket; + }); // // Lay out the string elements and record their positions. @@ -305,8 +324,8 @@ StringIntegerMap::StringIntegerMap( std::unordered_map string_element_byte_index_map; string_element_data_.resize(string_element_data_size + sizeof(std::uint64_t)); - auto *string_element = string_element_data_.data(); - for (auto &builder_element : builder_string_elements) { + auto* string_element = string_element_data_.data(); + for (auto& builder_element : builder_string_elements) { builder_element.element_offset = string_element - string_element_data_.data(); @@ -320,36 +339,41 @@ StringIntegerMap::StringIntegerMap( string_size_.write(string_element, builder_element.string.size()); *string_element = getSmallHash(builder_element.hash); string_element++; - std::memcpy(string_element, builder_element.string.data(), - builder_element.string.size()); + std::memcpy( + string_element, + builder_element.string.data(), + builder_element.string.size()); string_element += builder_element.string.size(); - assert(string_element >= string_element_data_.data() && - string_element <= - string_element_data_.data() + string_element_data_size); + assert( + string_element >= string_element_data_.data() && + string_element <= + string_element_data_.data() + string_element_data_size); } // // Lay out the integer elements. // - integer_element_data_.resize(integer_element_data_size + - sizeof(std::uint64_t)); - auto *integer_element = integer_element_data_.data(); - for (auto &builder_element : builder_integer_elements) { + integer_element_data_.resize( + integer_element_data_size + sizeof(std::uint64_t)); + auto* integer_element = integer_element_data_.data(); + for (auto& builder_element : builder_integer_elements) { builder_element.element_offset = integer_element - integer_element_data_.data(); auto string_element_byte_index_iter = string_element_byte_index_map.find(builder_element.string); - assert(string_element_byte_index_iter != - std::end(string_element_byte_index_map)); + assert( + string_element_byte_index_iter != + std::end(string_element_byte_index_map)); integer_element = integer_.write(integer_element, builder_element.integer); integer_element = string_size_.write(integer_element, builder_element.string.size()); integer_element = string_offset_.write( integer_element, string_element_byte_index_iter->second); - assert(integer_element >= integer_element_data_.data() && - integer_element <= - integer_element_data_.data() + integer_element_data_size); + assert( + integer_element >= integer_element_data_.data() && + integer_element <= + integer_element_data_.data() + integer_element_data_size); } // @@ -362,20 +386,20 @@ StringIntegerMap::StringIntegerMap( auto builder_integer_elements_iter = std::begin(builder_integer_elements); for (std::size_t bucket_idx = 0; bucket_idx < bucket_count_; ++bucket_idx) { - auto *string_bucket = string_bucket_data_.data() + - (bucket_idx * element_offset_.getByteCount()); + auto* string_bucket = string_bucket_data_.data() + + (bucket_idx * element_offset_.getByteCount()); if (builder_string_elements_iter != std::end(builder_string_elements)) { - element_offset_.write(string_bucket, - builder_string_elements_iter->element_offset); + element_offset_.write( + string_bucket, builder_string_elements_iter->element_offset); } else { element_offset_.write(string_bucket, string_element_data_size); } - auto *integer_bucket = integer_bucket_data_.data() + - (bucket_idx * element_offset_.getByteCount()); + auto* integer_bucket = integer_bucket_data_.data() + + (bucket_idx * element_offset_.getByteCount()); if (builder_integer_elements_iter != std::end(builder_integer_elements)) { - element_offset_.write(integer_bucket, - builder_integer_elements_iter->element_offset); + element_offset_.write( + integer_bucket, builder_integer_elements_iter->element_offset); } else { element_offset_.write(integer_bucket, integer_element_data_size); } @@ -414,7 +438,8 @@ StringIntegerMap::tryGetInteger( template bool StringIntegerMap::tryGetInteger( - std::string_view str, std::uint64_t &result) const { + std::string_view str, + std::uint64_t& result) const { if (size_ == 0) { return false; } @@ -423,8 +448,8 @@ bool StringIntegerMap::tryGetInteger( const auto bucket_index = hash % bucket_count_; const auto small_hash = getSmallHash(hash); - const auto *bucket_data = string_bucket_data_.data() + - (bucket_index * element_offset_.getByteCount()); + const auto* bucket_data = string_bucket_data_.data() + + (bucket_index * element_offset_.getByteCount()); const auto lower_element_offset = element_offset_.read(bucket_data); const auto upper_element_offset = element_offset_.read(bucket_data + element_offset_.getByteCount()); @@ -433,9 +458,10 @@ bool StringIntegerMap::tryGetInteger( const auto string_size_size = string_size_.getByteCount(); std::size_t element_size = 0; - auto *element_data_end = string_element_data_.data() + upper_element_offset; - for (auto *element_data = string_element_data_.data() + lower_element_offset; - element_data < element_data_end; element_data += element_size) { + auto* element_data_end = string_element_data_.data() + upper_element_offset; + for (auto* element_data = string_element_data_.data() + lower_element_offset; + element_data < element_data_end; + element_data += element_size) { // // Read the string length. // @@ -461,8 +487,8 @@ bool StringIntegerMap::tryGetInteger( // std::string_view element_string( - reinterpret_cast(element_data + integer_size + - string_size_size + 1), + reinterpret_cast( + element_data + integer_size + string_size_size + 1), element_string_length); if (str == element_string) { result = integer_.read(element_data); @@ -484,34 +510,35 @@ StringIntegerMap::tryGetString( template bool StringIntegerMap::tryGetString( - std::uint64_t integer, std::string_view &result) const { + std::uint64_t integer, + std::string_view& result) const { if (size_ == 0) { return false; } const auto bucket_index = getBucketIndex(integer); - const auto *bucket_data = integer_bucket_data_.data() + - (bucket_index * element_offset_.getByteCount()); + const auto* bucket_data = integer_bucket_data_.data() + + (bucket_index * element_offset_.getByteCount()); const auto lower_element_offset = element_offset_.read(bucket_data); const auto upper_element_offset = element_offset_.read(bucket_data + element_offset_.getByteCount()); const auto integer_element_size = integer_.getByteCount() + - string_offset_.getByteCount() + - string_size_.getByteCount(); - auto *element_data_end = integer_element_data_.data() + upper_element_offset; - for (auto *element_data = integer_element_data_.data() + lower_element_offset; - element_data < element_data_end; element_data += integer_element_size) { + string_offset_.getByteCount() + string_size_.getByteCount(); + auto* element_data_end = integer_element_data_.data() + upper_element_offset; + for (auto* element_data = integer_element_data_.data() + lower_element_offset; + element_data < element_data_end; + element_data += integer_element_size) { const auto element_integer = integer_.read(element_data); if (element_integer == integer) { const auto element_string_size = string_size_.read(element_data + integer_.getByteCount()); const auto element_string_offset = string_offset_.read( element_data + integer_.getByteCount() + string_size_.getByteCount()); - const auto *string_element = + const auto* string_element = string_element_data_.data() + element_string_offset; - const auto *string_data = reinterpret_cast( + const auto* string_data = reinterpret_cast( string_element + integer_.getByteCount() + string_size_.getByteCount() + 1); result = std::string_view(string_data, element_string_size); @@ -525,8 +552,8 @@ bool StringIntegerMap::tryGetString( } template -std::size_t -StringIntegerMap::size() const { +std::size_t StringIntegerMap::size() + const { return size_; } @@ -541,19 +568,19 @@ StringIntegerMap::getElement( const auto string_size_size = string_size_.getByteCount(); const auto element_size = integer_size + string_offset_size + string_size_size; - const auto *element_data = &integer_element_data_[index * element_size]; + const auto* element_data = &integer_element_data_[index * element_size]; const auto integer = integer_.read(element_data); element_data += integer_size; const auto string_size = string_size_.read(element_data); element_data += string_size_size; const auto string_offset = string_offset_.read(element_data); - const auto *string_data = &string_element_data_[string_offset + integer_size + - string_size_size + 1]; + const auto* string_data = + &string_element_data_ + [string_offset + integer_size + string_size_size + 1]; return std::make_pair( - std::string_view(reinterpret_cast(string_data), - string_size), + std::string_view(reinterpret_cast(string_data), string_size), integer); } @@ -579,9 +606,10 @@ StringIntegerMap::getSmallHash( return static_cast(hash >> shift); } -template , - typename TIntegerHash = std::hash, - typename TAllocator = std::allocator> +template < + typename TStringHash = std::hash, + typename TIntegerHash = std::hash, + typename TAllocator = std::allocator> struct StringIntegerMapTypeBuilder { using Map = StringIntegerMap; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tekken.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tekken.h index 3e2aaec5f6..da8c5ebccc 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tekken.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tekken.h @@ -27,7 +27,7 @@ namespace tokenizers { class Tekken : public detail::BPETokenizerBase { -public: + public: struct TekkenConfig { std::string pattern; size_t num_vocab_tokens; @@ -52,35 +52,39 @@ class Tekken : public detail::BPETokenizerBase { ~Tekken() override = default; // Load from tekken.json file - Error load(const std::string &tokenizer_path) override; + Error load(const std::string& tokenizer_path) override; // Support loading with explicit special tokens - Error - load_with_special_tokens(const std::string &tokenizer_path, - const std::vector &special_tokens); + Error load_with_special_tokens( + const std::string& tokenizer_path, + const std::vector& special_tokens); // Get the version string - const std::string &get_version() const { return _version; } + const std::string& get_version() const { + return _version; + } -protected: + protected: // Virtual methods from BPETokenizerBase - Error _encode(const std::string &input, std::vector &ret, - uint64_t &last_piece_token_len) const override; + Error _encode( + const std::string& input, + std::vector& ret, + uint64_t& last_piece_token_len) const override; - void _decode(const std::string &input, std::string &ret) const override; + void _decode(const std::string& input, std::string& ret) const override; -private: + private: // Parse the JSON configuration - Result _parse_config(const nlohmann::json &j) const; + Result _parse_config(const nlohmann::json& j) const; // Build token map from JSON vocab - Result - _load_vocab_from_json(const nlohmann::json &vocab_json, - size_t max_vocab) const; + Result _load_vocab_from_json( + const nlohmann::json& vocab_json, + size_t max_vocab) const; // Initialize special tokens (fills up to num_special_tokens slots) std::vector _initialize_special_tokens( - const std::vector &defined_tokens, + const std::vector& defined_tokens, size_t num_special_tokens) const; // Default Tekken pattern diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tiktoken.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tiktoken.h index 27cb639d6a..0e7c543848 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tiktoken.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tiktoken.h @@ -31,53 +31,77 @@ static constexpr size_t kBOSTokenIndex = 0; static constexpr size_t kEOSTokenIndex = 1; class Tiktoken : public detail::BPETokenizerBase { -public: - explicit Tiktoken(std::string pattern, - std::unique_ptr> special_tokens, - size_t bos_token_index, size_t eos_token_index) + public: + explicit Tiktoken( + std::string pattern, + std::unique_ptr> special_tokens, + size_t bos_token_index, + size_t eos_token_index) : _pattern(std::move(pattern)), _special_tokens(std::move(special_tokens)), - _bos_token_index(bos_token_index), _eos_token_index(eos_token_index) { + _bos_token_index(bos_token_index), + _eos_token_index(eos_token_index) { if (_bos_token_index >= _special_tokens->size() || _eos_token_index >= _special_tokens->size()) { abort(); } } - explicit Tiktoken(std::string pattern, - const std::vector &special_tokens, - size_t bos_token_index, size_t eos_token_index) - : Tiktoken(pattern, - std::make_unique>(special_tokens), - bos_token_index, eos_token_index) {} - - explicit Tiktoken(const std::vector &special_tokens, - size_t bos_token_index, size_t eos_token_index) - : Tiktoken(_get_default_patern(), - std::make_unique>(special_tokens), - bos_token_index, eos_token_index) {} - - explicit Tiktoken(std::unique_ptr> special_tokens, - size_t bos_token_index, size_t eos_token_index) - : Tiktoken(_get_default_patern(), std::move(special_tokens), - bos_token_index, eos_token_index) {} + explicit Tiktoken( + std::string pattern, + const std::vector& special_tokens, + size_t bos_token_index, + size_t eos_token_index) + : Tiktoken( + pattern, + std::make_unique>(special_tokens), + bos_token_index, + eos_token_index) {} + + explicit Tiktoken( + const std::vector& special_tokens, + size_t bos_token_index, + size_t eos_token_index) + : Tiktoken( + _get_default_patern(), + std::make_unique>(special_tokens), + bos_token_index, + eos_token_index) {} + + explicit Tiktoken( + std::unique_ptr> special_tokens, + size_t bos_token_index, + size_t eos_token_index) + : Tiktoken( + _get_default_patern(), + std::move(special_tokens), + bos_token_index, + eos_token_index) {} explicit Tiktoken() : _pattern(_get_default_patern()), _special_tokens(_get_default_special_tokens()), - _bos_token_index(kBOSTokenIndex), _eos_token_index(kEOSTokenIndex) {}; + _bos_token_index(kBOSTokenIndex), + _eos_token_index(kEOSTokenIndex) {}; - Error load(const std::string &tokenizer_path) override; + Error load(const std::string& tokenizer_path) override; -private: + private: static inline std::unique_ptr> _get_default_special_tokens() { auto special_tokens = std::make_unique>(std::vector{ - "<|begin_of_text|>", "<|end_of_text|>", - "<|reserved_special_token_0|>", "<|reserved_special_token_1|>", - "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>", - "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>"}); + "<|begin_of_text|>", + "<|end_of_text|>", + "<|reserved_special_token_0|>", + "<|reserved_special_token_1|>", + "<|finetune_right_pad_id|>", + "<|step_id|>", + "<|start_header_id|>", + "<|end_header_id|>", + "<|eom_id|>", + "<|eot_id|>", + "<|python_tag|>"}); // pad the rest of the special tokens with reserved tokens ssize_t reserved_special_token_num = 2; while (special_tokens->size() < kSpecialTokensSize) { @@ -93,10 +117,12 @@ class Tiktoken : public detail::BPETokenizerBase { return R"((?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+)"; } - Error _encode(const std::string &input, std::vector &ret, - uint64_t &last_piece_token_len) const override; + Error _encode( + const std::string& input, + std::vector& ret, + uint64_t& last_piece_token_len) const override; - void _decode(const std::string &input, std::string &ret) const override; + void _decode(const std::string& input, std::string& ret) const override; detail::TokenMap _build_special_token_map(ssize_t num_base_tokens) const; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/token_decoder.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/token_decoder.h index 822f9d9675..0f6e3bc2c5 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/token_decoder.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/token_decoder.h @@ -10,7 +10,6 @@ #pragma once // Standard -#include #include #include #include @@ -28,11 +27,10 @@ namespace tokenizers { * Base class for all token decoders */ class TokenDecoder { -public: + public: /* -- Types -- */ - /** Shared pointer type */ - typedef std::shared_ptr Ptr; + using Ptr = std::unique_ptr; /* -- Virtual Methods -- */ @@ -45,8 +43,8 @@ class TokenDecoder { * * @returns decoded: The decoded token string */ - virtual std::vector - decode(const std::vector &tokens) const = 0; + virtual std::vector decode( + const std::vector& tokens) const = 0; // virtual destructor virtual ~TokenDecoder() = default; @@ -56,18 +54,18 @@ class TokenDecoder { // -- Factory ------------------------------------------------------------------ // Helper macro to standardize addition of config member fields -#define TOKEN_DECODER_CONFIG_MEMBER(type, name) \ - std::optional name; \ - TokenDecoderConfig &set_##name(type arg) { \ - this->name = std::move(arg); \ - return *this; \ +#define TOKEN_DECODER_CONFIG_MEMBER(type, name) \ + std::optional name; \ + TokenDecoderConfig& set_##name(type arg) { \ + this->name = std::move(arg); \ + return *this; \ } /** * Factory and config class for creating a new TokenDecoder */ class TokenDecoderConfig { -public: + public: /** * The Type name string matching from decoders * https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/decoders/mod.rs#L55 @@ -90,6 +88,18 @@ class TokenDecoderConfig { TOKEN_DECODER_CONFIG_MEMBER(std::string, wordpiece_prefix) TOKEN_DECODER_CONFIG_MEMBER(bool, wordpiece_cleanup) + // Parameters for Metaspace decoder + TOKEN_DECODER_CONFIG_MEMBER(std::string, metaspace_replacement) + TOKEN_DECODER_CONFIG_MEMBER(std::string, metaspace_prepend_scheme) + + // Parameters for BPE decoder + TOKEN_DECODER_CONFIG_MEMBER(std::string, bpe_suffix) + + // Parameters for CTC decoder + TOKEN_DECODER_CONFIG_MEMBER(std::string, ctc_pad_token) + TOKEN_DECODER_CONFIG_MEMBER(std::string, ctc_word_delimiter_token) + TOKEN_DECODER_CONFIG_MEMBER(bool, ctc_cleanup) + /*----------------*/ /* Public methods */ /*----------------*/ @@ -107,7 +117,7 @@ class TokenDecoderConfig { /** * Populate from a json config file */ - TokenDecoderConfig &parse_json(const nlohmann::json &json_config); + TokenDecoderConfig& parse_json(const nlohmann::json& json_config); }; // end class TokenDecoderConfig // -- ByteLevel ---------------------------------------------------------------- @@ -116,9 +126,9 @@ class TokenDecoderConfig { // https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/byte_level.rs class ByteLevelTokenDecoder : public TokenDecoder { -public: - std::vector - decode(const std::vector &tokens) const override; + public: + std::vector decode( + const std::vector& tokens) const override; }; // end class ByteLevelTokenDecoder @@ -126,13 +136,14 @@ class ByteLevelTokenDecoder : public TokenDecoder { // Replaces a pattern with a replacement string class ReplaceTokenDecoder : public TokenDecoder { -public: - explicit ReplaceTokenDecoder(const std::string &pattern, - const std::string &content); - std::vector - decode(const std::vector &tokens) const override; - -private: + public: + explicit ReplaceTokenDecoder( + const std::string& pattern, + const std::string& content); + std::vector decode( + const std::vector& tokens) const override; + + private: std::string pattern_; std::string content_; }; // end class ReplaceTokenDecoder @@ -141,9 +152,9 @@ class ReplaceTokenDecoder : public TokenDecoder { // Handles byte fallback decoding class ByteFallbackTokenDecoder : public TokenDecoder { -public: - std::vector - decode(const std::vector &tokens) const override; + public: + std::vector decode( + const std::vector& tokens) const override; }; // end class ByteFallbackTokenDecoder @@ -151,9 +162,9 @@ class ByteFallbackTokenDecoder : public TokenDecoder { // Fuses tokens together class FuseTokenDecoder : public TokenDecoder { -public: - std::vector - decode(const std::vector &tokens) const override; + public: + std::vector decode( + const std::vector& tokens) const override; }; // end class FuseTokenDecoder @@ -161,13 +172,15 @@ class FuseTokenDecoder : public TokenDecoder { // Strips characters from tokens class StripTokenDecoder : public TokenDecoder { -public: - explicit StripTokenDecoder(const std::string &content_str, size_t start, - size_t stop); - std::vector - decode(const std::vector &tokens) const override; - -private: + public: + explicit StripTokenDecoder( + const std::string& content_str, + size_t start, + size_t stop); + std::vector decode( + const std::vector& tokens) const override; + + private: uint32_t content_; size_t start_; size_t stop_; @@ -177,13 +190,14 @@ class StripTokenDecoder : public TokenDecoder { // Used for WordPiece decoding class WordPieceTokenDecoder : public TokenDecoder { -public: - explicit WordPieceTokenDecoder(std::string prefix = "##", - bool cleanup = true); - std::vector - decode(const std::vector &tokens) const override; - -private: + public: + explicit WordPieceTokenDecoder( + std::string prefix = "##", + bool cleanup = true); + std::vector decode( + const std::vector& tokens) const override; + + private: std::string prefix_; bool cleanup_; }; // end class WordPieceTokenDecoder @@ -192,13 +206,68 @@ class WordPieceTokenDecoder : public TokenDecoder { // Applies a sequence of decoders in order class SequenceTokenDecoder : public TokenDecoder { -public: + public: explicit SequenceTokenDecoder(std::vector decoders); - std::vector - decode(const std::vector &tokens) const override; + std::vector decode( + const std::vector& tokens) const override; -private: + private: std::vector decoders_; }; // end class SequenceTokenDecoder +// -- Metaspace ---------------------------------------------------------------- +// Replaces the replacement character (▁) with space and handles prepend. + +class MetaspaceTokenDecoder : public TokenDecoder { + public: + explicit MetaspaceTokenDecoder( + const std::string& replacement = "\xe2\x96\x81", + const std::string& prepend_scheme = "always") + : replacement_(replacement), prepend_scheme_(prepend_scheme) {} + + std::vector decode( + const std::vector& tokens) const override; + + private: + std::string replacement_; + std::string prepend_scheme_; +}; + +// -- BPE ---------------------------------------------------------------------- +// Handles end-of-word suffix (e.g. ). + +class BPETokenDecoder : public TokenDecoder { + public: + explicit BPETokenDecoder(const std::string& suffix = "") + : suffix_(suffix) {} + + std::vector decode( + const std::vector& tokens) const override; + + private: + std::string suffix_; +}; + +// -- CTC ---------------------------------------------------------------------- +// Connectionist Temporal Classification decoder for speech models. + +class CTCTokenDecoder : public TokenDecoder { + public: + explicit CTCTokenDecoder( + const std::string& pad_token = "", + const std::string& word_delimiter_token = "|", + bool cleanup = true) + : pad_token_(pad_token), + word_delimiter_token_(word_delimiter_token), + cleanup_(cleanup) {} + + std::vector decode( + const std::vector& tokens) const override; + + private: + std::string pad_token_; + std::string word_delimiter_token_; + bool cleanup_; +}; + } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tokenizer.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tokenizer.h index 708f86263d..75195a4bbf 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tokenizer.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tokenizer.h @@ -21,16 +21,16 @@ namespace tokenizers { struct TokenIndex { - const char *str; + const char* str; int32_t id; }; class Tokenizer { -public: + public: explicit Tokenizer() {} virtual ~Tokenizer() {} - virtual Error load(const std::string &tokenizer_path) = 0; + virtual Error load(const std::string& tokenizer_path) = 0; /** * Returns the raw vocabulary piece for a given token id. @@ -60,7 +60,7 @@ class Tokenizer { * @return Result containing the token id, or an error if the piece is not * found in the vocabulary or the tokenizer is not initialized */ - virtual Result piece_to_id(const std::string &text) const = 0; + virtual Result piece_to_id(const std::string& text) const = 0; /** * Encode the input string into a vector of token IDs. @@ -74,22 +74,31 @@ class Tokenizer { * fails */ virtual Result> - encode(const std::string &input, int8_t bos = 0, int8_t eos = 0) const = 0; + encode(const std::string& input, int8_t bos = 0, int8_t eos = 0) const = 0; - virtual Result - decode(uint64_t prev_token, uint64_t token, - bool skip_special_tokens = false) const = 0; + virtual Result decode( + uint64_t prev_token, + uint64_t token, + bool skip_special_tokens = false) const = 0; // getters - int32_t vocab_size() const { return vocab_size_; } + int32_t vocab_size() const { + return vocab_size_; + } - uint64_t bos_tok() const { return bos_tok_; } + uint64_t bos_tok() const { + return bos_tok_; + } - uint64_t eos_tok() const { return eos_tok_; } + uint64_t eos_tok() const { + return eos_tok_; + } - virtual bool is_loaded() const { return initialized_; } + virtual bool is_loaded() const { + return initialized_; + } -protected: + protected: bool initialized_ = false; int32_t vocab_size_ = 0; uint64_t bos_tok_ = 0, eos_tok_ = 0, unk_tok_ = 0; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/truncation.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/truncation.h index c6819cd266..4f16ee6f54 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/truncation.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/truncation.h @@ -41,40 +41,41 @@ struct TruncationParams { }; class Truncation { -public: - /** Shared pointer type */ - typedef std::shared_ptr Ptr; + public: + using Ptr = std::unique_ptr; /** * @param params: The truncation parameters */ - explicit Truncation(const TruncationParams ¶ms); + explicit Truncation(const TruncationParams& params); /** * Truncate the tokens according to the configuration. - * + * * @param tokens The tokens to truncate. - * @param num_tokens_to_add The number of special tokens that will be added - * later. These are subtracted from max_length during truncation calculation. + * @param num_tokens_to_add The number of special tokens that will be added later. + * These are subtracted from max_length during truncation calculation. */ - std::vector truncate(std::vector tokens, - size_t num_tokens_to_add = 0) const; + std::vector truncate( + std::vector tokens, + size_t num_tokens_to_add = 0) const; /** * Truncate a pair of sequences according to the configuration. */ - std::pair, std::vector> - truncate_pair(std::vector a, std::vector b, - size_t num_tokens_to_add = 0) const; + std::pair, std::vector> truncate_pair( + std::vector a, + std::vector b, + size_t num_tokens_to_add = 0) const; -private: + private: TruncationParams params_; }; // -- Factory ------------------------------------------------------------------ class TruncationConfig { -public: + public: /** * Construct the truncation instance from the member data */ @@ -83,7 +84,7 @@ class TruncationConfig { /** * Populate from a json config file */ - TruncationConfig &parse_json(const nlohmann::json &json_config); + TruncationConfig& parse_json(const nlohmann::json& json_config); // Configuration members TruncationParams params; diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc-data.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc-data.h index 384c00c3cb..7e169db695 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc-data.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc-data.h @@ -3,3169 +3,3164 @@ * Generated from UnicodeData.txt * * This file contains proper canonical decomposition and composition mappings - * that support multi-codepoint sequences, required for correct NFD/NFC - * normalization. + * that support multi-codepoint sequences, required for correct NFD/NFC normalization. */ #pragma once #include #include -#include #include +#include namespace tokenizers { // Canonical decomposition table: composed -> [base, combining_marks...] // Only includes canonical decompositions (not compatibility) -static const std::unordered_map> - nfd_decomposition_table = { - {0x00C0, {0x0041, 0x0300}}, // 'À' - {0x00C1, {0x0041, 0x0301}}, // 'Á' - {0x00C2, {0x0041, 0x0302}}, // 'Â' - {0x00C3, {0x0041, 0x0303}}, // 'Ã' - {0x00C4, {0x0041, 0x0308}}, // 'Ä' - {0x00C5, {0x0041, 0x030A}}, // 'Å' - {0x00C7, {0x0043, 0x0327}}, // 'Ç' - {0x00C8, {0x0045, 0x0300}}, // 'È' - {0x00C9, {0x0045, 0x0301}}, // 'É' - {0x00CA, {0x0045, 0x0302}}, // 'Ê' - {0x00CB, {0x0045, 0x0308}}, // 'Ë' - {0x00CC, {0x0049, 0x0300}}, // 'Ì' - {0x00CD, {0x0049, 0x0301}}, // 'Í' - {0x00CE, {0x0049, 0x0302}}, // 'Î' - {0x00CF, {0x0049, 0x0308}}, // 'Ï' - {0x00D1, {0x004E, 0x0303}}, // 'Ñ' - {0x00D2, {0x004F, 0x0300}}, // 'Ò' - {0x00D3, {0x004F, 0x0301}}, // 'Ó' - {0x00D4, {0x004F, 0x0302}}, // 'Ô' - {0x00D5, {0x004F, 0x0303}}, // 'Õ' - {0x00D6, {0x004F, 0x0308}}, // 'Ö' - {0x00D9, {0x0055, 0x0300}}, // 'Ù' - {0x00DA, {0x0055, 0x0301}}, // 'Ú' - {0x00DB, {0x0055, 0x0302}}, // 'Û' - {0x00DC, {0x0055, 0x0308}}, // 'Ü' - {0x00DD, {0x0059, 0x0301}}, // 'Ý' - {0x00E0, {0x0061, 0x0300}}, // 'à' - {0x00E1, {0x0061, 0x0301}}, // 'á' - {0x00E2, {0x0061, 0x0302}}, // 'â' - {0x00E3, {0x0061, 0x0303}}, // 'ã' - {0x00E4, {0x0061, 0x0308}}, // 'ä' - {0x00E5, {0x0061, 0x030A}}, // 'å' - {0x00E7, {0x0063, 0x0327}}, // 'ç' - {0x00E8, {0x0065, 0x0300}}, // 'è' - {0x00E9, {0x0065, 0x0301}}, // 'é' - {0x00EA, {0x0065, 0x0302}}, // 'ê' - {0x00EB, {0x0065, 0x0308}}, // 'ë' - {0x00EC, {0x0069, 0x0300}}, // 'ì' - {0x00ED, {0x0069, 0x0301}}, // 'í' - {0x00EE, {0x0069, 0x0302}}, // 'î' - {0x00EF, {0x0069, 0x0308}}, // 'ï' - {0x00F1, {0x006E, 0x0303}}, // 'ñ' - {0x00F2, {0x006F, 0x0300}}, // 'ò' - {0x00F3, {0x006F, 0x0301}}, // 'ó' - {0x00F4, {0x006F, 0x0302}}, // 'ô' - {0x00F5, {0x006F, 0x0303}}, // 'õ' - {0x00F6, {0x006F, 0x0308}}, // 'ö' - {0x00F9, {0x0075, 0x0300}}, // 'ù' - {0x00FA, {0x0075, 0x0301}}, // 'ú' - {0x00FB, {0x0075, 0x0302}}, // 'û' - {0x00FC, {0x0075, 0x0308}}, // 'ü' - {0x00FD, {0x0079, 0x0301}}, // 'ý' - {0x00FF, {0x0079, 0x0308}}, // 'ÿ' - {0x0100, {0x0041, 0x0304}}, // 'Ā' - {0x0101, {0x0061, 0x0304}}, // 'ā' - {0x0102, {0x0041, 0x0306}}, // 'Ă' - {0x0103, {0x0061, 0x0306}}, // 'ă' - {0x0104, {0x0041, 0x0328}}, // 'Ą' - {0x0105, {0x0061, 0x0328}}, // 'ą' - {0x0106, {0x0043, 0x0301}}, // 'Ć' - {0x0107, {0x0063, 0x0301}}, // 'ć' - {0x0108, {0x0043, 0x0302}}, // 'Ĉ' - {0x0109, {0x0063, 0x0302}}, // 'ĉ' - {0x010A, {0x0043, 0x0307}}, // 'Ċ' - {0x010B, {0x0063, 0x0307}}, // 'ċ' - {0x010C, {0x0043, 0x030C}}, // 'Č' - {0x010D, {0x0063, 0x030C}}, // 'č' - {0x010E, {0x0044, 0x030C}}, // 'Ď' - {0x010F, {0x0064, 0x030C}}, // 'ď' - {0x0112, {0x0045, 0x0304}}, // 'Ē' - {0x0113, {0x0065, 0x0304}}, // 'ē' - {0x0114, {0x0045, 0x0306}}, // 'Ĕ' - {0x0115, {0x0065, 0x0306}}, // 'ĕ' - {0x0116, {0x0045, 0x0307}}, // 'Ė' - {0x0117, {0x0065, 0x0307}}, // 'ė' - {0x0118, {0x0045, 0x0328}}, // 'Ę' - {0x0119, {0x0065, 0x0328}}, // 'ę' - {0x011A, {0x0045, 0x030C}}, // 'Ě' - {0x011B, {0x0065, 0x030C}}, // 'ě' - {0x011C, {0x0047, 0x0302}}, // 'Ĝ' - {0x011D, {0x0067, 0x0302}}, // 'ĝ' - {0x011E, {0x0047, 0x0306}}, // 'Ğ' - {0x011F, {0x0067, 0x0306}}, // 'ğ' - {0x0120, {0x0047, 0x0307}}, // 'Ġ' - {0x0121, {0x0067, 0x0307}}, // 'ġ' - {0x0122, {0x0047, 0x0327}}, // 'Ģ' - {0x0123, {0x0067, 0x0327}}, // 'ģ' - {0x0124, {0x0048, 0x0302}}, // 'Ĥ' - {0x0125, {0x0068, 0x0302}}, // 'ĥ' - {0x0128, {0x0049, 0x0303}}, // 'Ĩ' - {0x0129, {0x0069, 0x0303}}, // 'ĩ' - {0x012A, {0x0049, 0x0304}}, // 'Ī' - {0x012B, {0x0069, 0x0304}}, // 'ī' - {0x012C, {0x0049, 0x0306}}, // 'Ĭ' - {0x012D, {0x0069, 0x0306}}, // 'ĭ' - {0x012E, {0x0049, 0x0328}}, // 'Į' - {0x012F, {0x0069, 0x0328}}, // 'į' - {0x0130, {0x0049, 0x0307}}, // 'İ' - {0x0134, {0x004A, 0x0302}}, // 'Ĵ' - {0x0135, {0x006A, 0x0302}}, // 'ĵ' - {0x0136, {0x004B, 0x0327}}, // 'Ķ' - {0x0137, {0x006B, 0x0327}}, // 'ķ' - {0x0139, {0x004C, 0x0301}}, // 'Ĺ' - {0x013A, {0x006C, 0x0301}}, // 'ĺ' - {0x013B, {0x004C, 0x0327}}, // 'Ļ' - {0x013C, {0x006C, 0x0327}}, // 'ļ' - {0x013D, {0x004C, 0x030C}}, // 'Ľ' - {0x013E, {0x006C, 0x030C}}, // 'ľ' - {0x0143, {0x004E, 0x0301}}, // 'Ń' - {0x0144, {0x006E, 0x0301}}, // 'ń' - {0x0145, {0x004E, 0x0327}}, // 'Ņ' - {0x0146, {0x006E, 0x0327}}, // 'ņ' - {0x0147, {0x004E, 0x030C}}, // 'Ň' - {0x0148, {0x006E, 0x030C}}, // 'ň' - {0x014C, {0x004F, 0x0304}}, // 'Ō' - {0x014D, {0x006F, 0x0304}}, // 'ō' - {0x014E, {0x004F, 0x0306}}, // 'Ŏ' - {0x014F, {0x006F, 0x0306}}, // 'ŏ' - {0x0150, {0x004F, 0x030B}}, // 'Ő' - {0x0151, {0x006F, 0x030B}}, // 'ő' - {0x0154, {0x0052, 0x0301}}, // 'Ŕ' - {0x0155, {0x0072, 0x0301}}, // 'ŕ' - {0x0156, {0x0052, 0x0327}}, // 'Ŗ' - {0x0157, {0x0072, 0x0327}}, // 'ŗ' - {0x0158, {0x0052, 0x030C}}, // 'Ř' - {0x0159, {0x0072, 0x030C}}, // 'ř' - {0x015A, {0x0053, 0x0301}}, // 'Ś' - {0x015B, {0x0073, 0x0301}}, // 'ś' - {0x015C, {0x0053, 0x0302}}, // 'Ŝ' - {0x015D, {0x0073, 0x0302}}, // 'ŝ' - {0x015E, {0x0053, 0x0327}}, // 'Ş' - {0x015F, {0x0073, 0x0327}}, // 'ş' - {0x0160, {0x0053, 0x030C}}, // 'Š' - {0x0161, {0x0073, 0x030C}}, // 'š' - {0x0162, {0x0054, 0x0327}}, // 'Ţ' - {0x0163, {0x0074, 0x0327}}, // 'ţ' - {0x0164, {0x0054, 0x030C}}, // 'Ť' - {0x0165, {0x0074, 0x030C}}, // 'ť' - {0x0168, {0x0055, 0x0303}}, // 'Ũ' - {0x0169, {0x0075, 0x0303}}, // 'ũ' - {0x016A, {0x0055, 0x0304}}, // 'Ū' - {0x016B, {0x0075, 0x0304}}, // 'ū' - {0x016C, {0x0055, 0x0306}}, // 'Ŭ' - {0x016D, {0x0075, 0x0306}}, // 'ŭ' - {0x016E, {0x0055, 0x030A}}, // 'Ů' - {0x016F, {0x0075, 0x030A}}, // 'ů' - {0x0170, {0x0055, 0x030B}}, // 'Ű' - {0x0171, {0x0075, 0x030B}}, // 'ű' - {0x0172, {0x0055, 0x0328}}, // 'Ų' - {0x0173, {0x0075, 0x0328}}, // 'ų' - {0x0174, {0x0057, 0x0302}}, // 'Ŵ' - {0x0175, {0x0077, 0x0302}}, // 'ŵ' - {0x0176, {0x0059, 0x0302}}, // 'Ŷ' - {0x0177, {0x0079, 0x0302}}, // 'ŷ' - {0x0178, {0x0059, 0x0308}}, // 'Ÿ' - {0x0179, {0x005A, 0x0301}}, // 'Ź' - {0x017A, {0x007A, 0x0301}}, // 'ź' - {0x017B, {0x005A, 0x0307}}, // 'Ż' - {0x017C, {0x007A, 0x0307}}, // 'ż' - {0x017D, {0x005A, 0x030C}}, // 'Ž' - {0x017E, {0x007A, 0x030C}}, // 'ž' - {0x01A0, {0x004F, 0x031B}}, // 'Ơ' - {0x01A1, {0x006F, 0x031B}}, // 'ơ' - {0x01AF, {0x0055, 0x031B}}, // 'Ư' - {0x01B0, {0x0075, 0x031B}}, // 'ư' - {0x01CD, {0x0041, 0x030C}}, // 'Ǎ' - {0x01CE, {0x0061, 0x030C}}, // 'ǎ' - {0x01CF, {0x0049, 0x030C}}, // 'Ǐ' - {0x01D0, {0x0069, 0x030C}}, // 'ǐ' - {0x01D1, {0x004F, 0x030C}}, // 'Ǒ' - {0x01D2, {0x006F, 0x030C}}, // 'ǒ' - {0x01D3, {0x0055, 0x030C}}, // 'Ǔ' - {0x01D4, {0x0075, 0x030C}}, // 'ǔ' - {0x01D5, {0x0055, 0x0308, 0x0304}}, // 'Ǖ' - {0x01D6, {0x0075, 0x0308, 0x0304}}, // 'ǖ' - {0x01D7, {0x0055, 0x0308, 0x0301}}, // 'Ǘ' - {0x01D8, {0x0075, 0x0308, 0x0301}}, // 'ǘ' - {0x01D9, {0x0055, 0x0308, 0x030C}}, // 'Ǚ' - {0x01DA, {0x0075, 0x0308, 0x030C}}, // 'ǚ' - {0x01DB, {0x0055, 0x0308, 0x0300}}, // 'Ǜ' - {0x01DC, {0x0075, 0x0308, 0x0300}}, // 'ǜ' - {0x01DE, {0x0041, 0x0308, 0x0304}}, // 'Ǟ' - {0x01DF, {0x0061, 0x0308, 0x0304}}, // 'ǟ' - {0x01E0, {0x0041, 0x0307, 0x0304}}, // 'Ǡ' - {0x01E1, {0x0061, 0x0307, 0x0304}}, // 'ǡ' - {0x01E2, {0x00C6, 0x0304}}, // 'Ǣ' - {0x01E3, {0x00E6, 0x0304}}, // 'ǣ' - {0x01E6, {0x0047, 0x030C}}, // 'Ǧ' - {0x01E7, {0x0067, 0x030C}}, // 'ǧ' - {0x01E8, {0x004B, 0x030C}}, // 'Ǩ' - {0x01E9, {0x006B, 0x030C}}, // 'ǩ' - {0x01EA, {0x004F, 0x0328}}, // 'Ǫ' - {0x01EB, {0x006F, 0x0328}}, // 'ǫ' - {0x01EC, {0x004F, 0x0328, 0x0304}}, // 'Ǭ' - {0x01ED, {0x006F, 0x0328, 0x0304}}, // 'ǭ' - {0x01EE, {0x01B7, 0x030C}}, // 'Ǯ' - {0x01EF, {0x0292, 0x030C}}, // 'ǯ' - {0x01F0, {0x006A, 0x030C}}, // 'ǰ' - {0x01F4, {0x0047, 0x0301}}, // 'Ǵ' - {0x01F5, {0x0067, 0x0301}}, // 'ǵ' - {0x01F8, {0x004E, 0x0300}}, // 'Ǹ' - {0x01F9, {0x006E, 0x0300}}, // 'ǹ' - {0x01FA, {0x0041, 0x030A, 0x0301}}, // 'Ǻ' - {0x01FB, {0x0061, 0x030A, 0x0301}}, // 'ǻ' - {0x01FC, {0x00C6, 0x0301}}, // 'Ǽ' - {0x01FD, {0x00E6, 0x0301}}, // 'ǽ' - {0x01FE, {0x00D8, 0x0301}}, // 'Ǿ' - {0x01FF, {0x00F8, 0x0301}}, // 'ǿ' - {0x0200, {0x0041, 0x030F}}, // 'Ȁ' - {0x0201, {0x0061, 0x030F}}, // 'ȁ' - {0x0202, {0x0041, 0x0311}}, // 'Ȃ' - {0x0203, {0x0061, 0x0311}}, // 'ȃ' - {0x0204, {0x0045, 0x030F}}, // 'Ȅ' - {0x0205, {0x0065, 0x030F}}, // 'ȅ' - {0x0206, {0x0045, 0x0311}}, // 'Ȇ' - {0x0207, {0x0065, 0x0311}}, // 'ȇ' - {0x0208, {0x0049, 0x030F}}, // 'Ȉ' - {0x0209, {0x0069, 0x030F}}, // 'ȉ' - {0x020A, {0x0049, 0x0311}}, // 'Ȋ' - {0x020B, {0x0069, 0x0311}}, // 'ȋ' - {0x020C, {0x004F, 0x030F}}, // 'Ȍ' - {0x020D, {0x006F, 0x030F}}, // 'ȍ' - {0x020E, {0x004F, 0x0311}}, // 'Ȏ' - {0x020F, {0x006F, 0x0311}}, // 'ȏ' - {0x0210, {0x0052, 0x030F}}, // 'Ȑ' - {0x0211, {0x0072, 0x030F}}, // 'ȑ' - {0x0212, {0x0052, 0x0311}}, // 'Ȓ' - {0x0213, {0x0072, 0x0311}}, // 'ȓ' - {0x0214, {0x0055, 0x030F}}, // 'Ȕ' - {0x0215, {0x0075, 0x030F}}, // 'ȕ' - {0x0216, {0x0055, 0x0311}}, // 'Ȗ' - {0x0217, {0x0075, 0x0311}}, // 'ȗ' - {0x0218, {0x0053, 0x0326}}, // 'Ș' - {0x0219, {0x0073, 0x0326}}, // 'ș' - {0x021A, {0x0054, 0x0326}}, // 'Ț' - {0x021B, {0x0074, 0x0326}}, // 'ț' - {0x021E, {0x0048, 0x030C}}, // 'Ȟ' - {0x021F, {0x0068, 0x030C}}, // 'ȟ' - {0x0226, {0x0041, 0x0307}}, // 'Ȧ' - {0x0227, {0x0061, 0x0307}}, // 'ȧ' - {0x0228, {0x0045, 0x0327}}, // 'Ȩ' - {0x0229, {0x0065, 0x0327}}, // 'ȩ' - {0x022A, {0x004F, 0x0308, 0x0304}}, // 'Ȫ' - {0x022B, {0x006F, 0x0308, 0x0304}}, // 'ȫ' - {0x022C, {0x004F, 0x0303, 0x0304}}, // 'Ȭ' - {0x022D, {0x006F, 0x0303, 0x0304}}, // 'ȭ' - {0x022E, {0x004F, 0x0307}}, // 'Ȯ' - {0x022F, {0x006F, 0x0307}}, // 'ȯ' - {0x0230, {0x004F, 0x0307, 0x0304}}, // 'Ȱ' - {0x0231, {0x006F, 0x0307, 0x0304}}, // 'ȱ' - {0x0232, {0x0059, 0x0304}}, // 'Ȳ' - {0x0233, {0x0079, 0x0304}}, // 'ȳ' - {0x0340, {0x0300}}, // '̀' - {0x0341, {0x0301}}, // '́' - {0x0343, {0x0313}}, // '̓' - {0x0344, {0x0308, 0x0301}}, // '̈́' - {0x0374, {0x02B9}}, // 'ʹ' - {0x037E, {0x003B}}, // ';' - {0x0385, {0x00A8, 0x0301}}, // '΅' - {0x0386, {0x0391, 0x0301}}, // 'Ά' - {0x0387, {0x00B7}}, // '·' - {0x0388, {0x0395, 0x0301}}, // 'Έ' - {0x0389, {0x0397, 0x0301}}, // 'Ή' - {0x038A, {0x0399, 0x0301}}, // 'Ί' - {0x038C, {0x039F, 0x0301}}, // 'Ό' - {0x038E, {0x03A5, 0x0301}}, // 'Ύ' - {0x038F, {0x03A9, 0x0301}}, // 'Ώ' - {0x0390, {0x03B9, 0x0308, 0x0301}}, // 'ΐ' - {0x03AA, {0x0399, 0x0308}}, // 'Ϊ' - {0x03AB, {0x03A5, 0x0308}}, // 'Ϋ' - {0x03AC, {0x03B1, 0x0301}}, // 'ά' - {0x03AD, {0x03B5, 0x0301}}, // 'έ' - {0x03AE, {0x03B7, 0x0301}}, // 'ή' - {0x03AF, {0x03B9, 0x0301}}, // 'ί' - {0x03B0, {0x03C5, 0x0308, 0x0301}}, // 'ΰ' - {0x03CA, {0x03B9, 0x0308}}, // 'ϊ' - {0x03CB, {0x03C5, 0x0308}}, // 'ϋ' - {0x03CC, {0x03BF, 0x0301}}, // 'ό' - {0x03CD, {0x03C5, 0x0301}}, // 'ύ' - {0x03CE, {0x03C9, 0x0301}}, // 'ώ' - {0x03D3, {0x03D2, 0x0301}}, // 'ϓ' - {0x03D4, {0x03D2, 0x0308}}, // 'ϔ' - {0x0400, {0x0415, 0x0300}}, // 'Ѐ' - {0x0401, {0x0415, 0x0308}}, // 'Ё' - {0x0403, {0x0413, 0x0301}}, // 'Ѓ' - {0x0407, {0x0406, 0x0308}}, // 'Ї' - {0x040C, {0x041A, 0x0301}}, // 'Ќ' - {0x040D, {0x0418, 0x0300}}, // 'Ѝ' - {0x040E, {0x0423, 0x0306}}, // 'Ў' - {0x0419, {0x0418, 0x0306}}, // 'Й' - {0x0439, {0x0438, 0x0306}}, // 'й' - {0x0450, {0x0435, 0x0300}}, // 'ѐ' - {0x0451, {0x0435, 0x0308}}, // 'ё' - {0x0453, {0x0433, 0x0301}}, // 'ѓ' - {0x0457, {0x0456, 0x0308}}, // 'ї' - {0x045C, {0x043A, 0x0301}}, // 'ќ' - {0x045D, {0x0438, 0x0300}}, // 'ѝ' - {0x045E, {0x0443, 0x0306}}, // 'ў' - {0x0476, {0x0474, 0x030F}}, // 'Ѷ' - {0x0477, {0x0475, 0x030F}}, // 'ѷ' - {0x04C1, {0x0416, 0x0306}}, // 'Ӂ' - {0x04C2, {0x0436, 0x0306}}, // 'ӂ' - {0x04D0, {0x0410, 0x0306}}, // 'Ӑ' - {0x04D1, {0x0430, 0x0306}}, // 'ӑ' - {0x04D2, {0x0410, 0x0308}}, // 'Ӓ' - {0x04D3, {0x0430, 0x0308}}, // 'ӓ' - {0x04D6, {0x0415, 0x0306}}, // 'Ӗ' - {0x04D7, {0x0435, 0x0306}}, // 'ӗ' - {0x04DA, {0x04D8, 0x0308}}, // 'Ӛ' - {0x04DB, {0x04D9, 0x0308}}, // 'ӛ' - {0x04DC, {0x0416, 0x0308}}, // 'Ӝ' - {0x04DD, {0x0436, 0x0308}}, // 'ӝ' - {0x04DE, {0x0417, 0x0308}}, // 'Ӟ' - {0x04DF, {0x0437, 0x0308}}, // 'ӟ' - {0x04E2, {0x0418, 0x0304}}, // 'Ӣ' - {0x04E3, {0x0438, 0x0304}}, // 'ӣ' - {0x04E4, {0x0418, 0x0308}}, // 'Ӥ' - {0x04E5, {0x0438, 0x0308}}, // 'ӥ' - {0x04E6, {0x041E, 0x0308}}, // 'Ӧ' - {0x04E7, {0x043E, 0x0308}}, // 'ӧ' - {0x04EA, {0x04E8, 0x0308}}, // 'Ӫ' - {0x04EB, {0x04E9, 0x0308}}, // 'ӫ' - {0x04EC, {0x042D, 0x0308}}, // 'Ӭ' - {0x04ED, {0x044D, 0x0308}}, // 'ӭ' - {0x04EE, {0x0423, 0x0304}}, // 'Ӯ' - {0x04EF, {0x0443, 0x0304}}, // 'ӯ' - {0x04F0, {0x0423, 0x0308}}, // 'Ӱ' - {0x04F1, {0x0443, 0x0308}}, // 'ӱ' - {0x04F2, {0x0423, 0x030B}}, // 'Ӳ' - {0x04F3, {0x0443, 0x030B}}, // 'ӳ' - {0x04F4, {0x0427, 0x0308}}, // 'Ӵ' - {0x04F5, {0x0447, 0x0308}}, // 'ӵ' - {0x04F8, {0x042B, 0x0308}}, // 'Ӹ' - {0x04F9, {0x044B, 0x0308}}, // 'ӹ' - {0x0622, {0x0627, 0x0653}}, // 'آ' - {0x0623, {0x0627, 0x0654}}, // 'أ' - {0x0624, {0x0648, 0x0654}}, // 'ؤ' - {0x0625, {0x0627, 0x0655}}, // 'إ' - {0x0626, {0x064A, 0x0654}}, // 'ئ' - {0x06C0, {0x06D5, 0x0654}}, // 'ۀ' - {0x06C2, {0x06C1, 0x0654}}, // 'ۂ' - {0x06D3, {0x06D2, 0x0654}}, // 'ۓ' - {0x0929, {0x0928, 0x093C}}, // 'ऩ' - {0x0931, {0x0930, 0x093C}}, // 'ऱ' - {0x0934, {0x0933, 0x093C}}, // 'ऴ' - {0x0958, {0x0915, 0x093C}}, // 'क़' - {0x0959, {0x0916, 0x093C}}, // 'ख़' - {0x095A, {0x0917, 0x093C}}, // 'ग़' - {0x095B, {0x091C, 0x093C}}, // 'ज़' - {0x095C, {0x0921, 0x093C}}, // 'ड़' - {0x095D, {0x0922, 0x093C}}, // 'ढ़' - {0x095E, {0x092B, 0x093C}}, // 'फ़' - {0x095F, {0x092F, 0x093C}}, // 'य़' - {0x09CB, {0x09C7, 0x09BE}}, // 'ো' - {0x09CC, {0x09C7, 0x09D7}}, // 'ৌ' - {0x09DC, {0x09A1, 0x09BC}}, // 'ড়' - {0x09DD, {0x09A2, 0x09BC}}, // 'ঢ়' - {0x09DF, {0x09AF, 0x09BC}}, // 'য়' - {0x0A33, {0x0A32, 0x0A3C}}, // 'ਲ਼' - {0x0A36, {0x0A38, 0x0A3C}}, // 'ਸ਼' - {0x0A59, {0x0A16, 0x0A3C}}, // 'ਖ਼' - {0x0A5A, {0x0A17, 0x0A3C}}, // 'ਗ਼' - {0x0A5B, {0x0A1C, 0x0A3C}}, // 'ਜ਼' - {0x0A5E, {0x0A2B, 0x0A3C}}, // 'ਫ਼' - {0x0B48, {0x0B47, 0x0B56}}, // 'ୈ' - {0x0B4B, {0x0B47, 0x0B3E}}, // 'ୋ' - {0x0B4C, {0x0B47, 0x0B57}}, // 'ୌ' - {0x0B5C, {0x0B21, 0x0B3C}}, // 'ଡ଼' - {0x0B5D, {0x0B22, 0x0B3C}}, // 'ଢ଼' - {0x0B94, {0x0B92, 0x0BD7}}, // 'ஔ' - {0x0BCA, {0x0BC6, 0x0BBE}}, // 'ொ' - {0x0BCB, {0x0BC7, 0x0BBE}}, // 'ோ' - {0x0BCC, {0x0BC6, 0x0BD7}}, // 'ௌ' - {0x0C48, {0x0C46, 0x0C56}}, // 'ై' - {0x0CC0, {0x0CBF, 0x0CD5}}, // 'ೀ' - {0x0CC7, {0x0CC6, 0x0CD5}}, // 'ೇ' - {0x0CC8, {0x0CC6, 0x0CD6}}, // 'ೈ' - {0x0CCA, {0x0CC6, 0x0CC2}}, // 'ೊ' - {0x0CCB, {0x0CC6, 0x0CC2, 0x0CD5}}, // 'ೋ' - {0x0D4A, {0x0D46, 0x0D3E}}, // 'ൊ' - {0x0D4B, {0x0D47, 0x0D3E}}, // 'ോ' - {0x0D4C, {0x0D46, 0x0D57}}, // 'ൌ' - {0x0DDA, {0x0DD9, 0x0DCA}}, // 'ේ' - {0x0DDC, {0x0DD9, 0x0DCF}}, // 'ො' - {0x0DDD, {0x0DD9, 0x0DCF, 0x0DCA}}, // 'ෝ' - {0x0DDE, {0x0DD9, 0x0DDF}}, // 'ෞ' - {0x0F43, {0x0F42, 0x0FB7}}, // 'གྷ' - {0x0F4D, {0x0F4C, 0x0FB7}}, // 'ཌྷ' - {0x0F52, {0x0F51, 0x0FB7}}, // 'དྷ' - {0x0F57, {0x0F56, 0x0FB7}}, // 'བྷ' - {0x0F5C, {0x0F5B, 0x0FB7}}, // 'ཛྷ' - {0x0F69, {0x0F40, 0x0FB5}}, // 'ཀྵ' - {0x0F73, {0x0F71, 0x0F72}}, // 'ཱི' - {0x0F75, {0x0F71, 0x0F74}}, // 'ཱུ' - {0x0F76, {0x0FB2, 0x0F80}}, // 'ྲྀ' - {0x0F78, {0x0FB3, 0x0F80}}, // 'ླྀ' - {0x0F81, {0x0F71, 0x0F80}}, // 'ཱྀ' - {0x0F93, {0x0F92, 0x0FB7}}, // 'ྒྷ' - {0x0F9D, {0x0F9C, 0x0FB7}}, // 'ྜྷ' - {0x0FA2, {0x0FA1, 0x0FB7}}, // 'ྡྷ' - {0x0FA7, {0x0FA6, 0x0FB7}}, // 'ྦྷ' - {0x0FAC, {0x0FAB, 0x0FB7}}, // 'ྫྷ' - {0x0FB9, {0x0F90, 0x0FB5}}, // 'ྐྵ' - {0x1026, {0x1025, 0x102E}}, // 'ဦ' - {0x1B06, {0x1B05, 0x1B35}}, // 'ᬆ' - {0x1B08, {0x1B07, 0x1B35}}, // 'ᬈ' - {0x1B0A, {0x1B09, 0x1B35}}, // 'ᬊ' - {0x1B0C, {0x1B0B, 0x1B35}}, // 'ᬌ' - {0x1B0E, {0x1B0D, 0x1B35}}, // 'ᬎ' - {0x1B12, {0x1B11, 0x1B35}}, // 'ᬒ' - {0x1B3B, {0x1B3A, 0x1B35}}, // 'ᬻ' - {0x1B3D, {0x1B3C, 0x1B35}}, // 'ᬽ' - {0x1B40, {0x1B3E, 0x1B35}}, // 'ᭀ' - {0x1B41, {0x1B3F, 0x1B35}}, // 'ᭁ' - {0x1B43, {0x1B42, 0x1B35}}, // 'ᭃ' - {0x1E00, {0x0041, 0x0325}}, // 'Ḁ' - {0x1E01, {0x0061, 0x0325}}, // 'ḁ' - {0x1E02, {0x0042, 0x0307}}, // 'Ḃ' - {0x1E03, {0x0062, 0x0307}}, // 'ḃ' - {0x1E04, {0x0042, 0x0323}}, // 'Ḅ' - {0x1E05, {0x0062, 0x0323}}, // 'ḅ' - {0x1E06, {0x0042, 0x0331}}, // 'Ḇ' - {0x1E07, {0x0062, 0x0331}}, // 'ḇ' - {0x1E08, {0x0043, 0x0327, 0x0301}}, // 'Ḉ' - {0x1E09, {0x0063, 0x0327, 0x0301}}, // 'ḉ' - {0x1E0A, {0x0044, 0x0307}}, // 'Ḋ' - {0x1E0B, {0x0064, 0x0307}}, // 'ḋ' - {0x1E0C, {0x0044, 0x0323}}, // 'Ḍ' - {0x1E0D, {0x0064, 0x0323}}, // 'ḍ' - {0x1E0E, {0x0044, 0x0331}}, // 'Ḏ' - {0x1E0F, {0x0064, 0x0331}}, // 'ḏ' - {0x1E10, {0x0044, 0x0327}}, // 'Ḑ' - {0x1E11, {0x0064, 0x0327}}, // 'ḑ' - {0x1E12, {0x0044, 0x032D}}, // 'Ḓ' - {0x1E13, {0x0064, 0x032D}}, // 'ḓ' - {0x1E14, {0x0045, 0x0304, 0x0300}}, // 'Ḕ' - {0x1E15, {0x0065, 0x0304, 0x0300}}, // 'ḕ' - {0x1E16, {0x0045, 0x0304, 0x0301}}, // 'Ḗ' - {0x1E17, {0x0065, 0x0304, 0x0301}}, // 'ḗ' - {0x1E18, {0x0045, 0x032D}}, // 'Ḙ' - {0x1E19, {0x0065, 0x032D}}, // 'ḙ' - {0x1E1A, {0x0045, 0x0330}}, // 'Ḛ' - {0x1E1B, {0x0065, 0x0330}}, // 'ḛ' - {0x1E1C, {0x0045, 0x0327, 0x0306}}, // 'Ḝ' - {0x1E1D, {0x0065, 0x0327, 0x0306}}, // 'ḝ' - {0x1E1E, {0x0046, 0x0307}}, // 'Ḟ' - {0x1E1F, {0x0066, 0x0307}}, // 'ḟ' - {0x1E20, {0x0047, 0x0304}}, // 'Ḡ' - {0x1E21, {0x0067, 0x0304}}, // 'ḡ' - {0x1E22, {0x0048, 0x0307}}, // 'Ḣ' - {0x1E23, {0x0068, 0x0307}}, // 'ḣ' - {0x1E24, {0x0048, 0x0323}}, // 'Ḥ' - {0x1E25, {0x0068, 0x0323}}, // 'ḥ' - {0x1E26, {0x0048, 0x0308}}, // 'Ḧ' - {0x1E27, {0x0068, 0x0308}}, // 'ḧ' - {0x1E28, {0x0048, 0x0327}}, // 'Ḩ' - {0x1E29, {0x0068, 0x0327}}, // 'ḩ' - {0x1E2A, {0x0048, 0x032E}}, // 'Ḫ' - {0x1E2B, {0x0068, 0x032E}}, // 'ḫ' - {0x1E2C, {0x0049, 0x0330}}, // 'Ḭ' - {0x1E2D, {0x0069, 0x0330}}, // 'ḭ' - {0x1E2E, {0x0049, 0x0308, 0x0301}}, // 'Ḯ' - {0x1E2F, {0x0069, 0x0308, 0x0301}}, // 'ḯ' - {0x1E30, {0x004B, 0x0301}}, // 'Ḱ' - {0x1E31, {0x006B, 0x0301}}, // 'ḱ' - {0x1E32, {0x004B, 0x0323}}, // 'Ḳ' - {0x1E33, {0x006B, 0x0323}}, // 'ḳ' - {0x1E34, {0x004B, 0x0331}}, // 'Ḵ' - {0x1E35, {0x006B, 0x0331}}, // 'ḵ' - {0x1E36, {0x004C, 0x0323}}, // 'Ḷ' - {0x1E37, {0x006C, 0x0323}}, // 'ḷ' - {0x1E38, {0x004C, 0x0323, 0x0304}}, // 'Ḹ' - {0x1E39, {0x006C, 0x0323, 0x0304}}, // 'ḹ' - {0x1E3A, {0x004C, 0x0331}}, // 'Ḻ' - {0x1E3B, {0x006C, 0x0331}}, // 'ḻ' - {0x1E3C, {0x004C, 0x032D}}, // 'Ḽ' - {0x1E3D, {0x006C, 0x032D}}, // 'ḽ' - {0x1E3E, {0x004D, 0x0301}}, // 'Ḿ' - {0x1E3F, {0x006D, 0x0301}}, // 'ḿ' - {0x1E40, {0x004D, 0x0307}}, // 'Ṁ' - {0x1E41, {0x006D, 0x0307}}, // 'ṁ' - {0x1E42, {0x004D, 0x0323}}, // 'Ṃ' - {0x1E43, {0x006D, 0x0323}}, // 'ṃ' - {0x1E44, {0x004E, 0x0307}}, // 'Ṅ' - {0x1E45, {0x006E, 0x0307}}, // 'ṅ' - {0x1E46, {0x004E, 0x0323}}, // 'Ṇ' - {0x1E47, {0x006E, 0x0323}}, // 'ṇ' - {0x1E48, {0x004E, 0x0331}}, // 'Ṉ' - {0x1E49, {0x006E, 0x0331}}, // 'ṉ' - {0x1E4A, {0x004E, 0x032D}}, // 'Ṋ' - {0x1E4B, {0x006E, 0x032D}}, // 'ṋ' - {0x1E4C, {0x004F, 0x0303, 0x0301}}, // 'Ṍ' - {0x1E4D, {0x006F, 0x0303, 0x0301}}, // 'ṍ' - {0x1E4E, {0x004F, 0x0303, 0x0308}}, // 'Ṏ' - {0x1E4F, {0x006F, 0x0303, 0x0308}}, // 'ṏ' - {0x1E50, {0x004F, 0x0304, 0x0300}}, // 'Ṑ' - {0x1E51, {0x006F, 0x0304, 0x0300}}, // 'ṑ' - {0x1E52, {0x004F, 0x0304, 0x0301}}, // 'Ṓ' - {0x1E53, {0x006F, 0x0304, 0x0301}}, // 'ṓ' - {0x1E54, {0x0050, 0x0301}}, // 'Ṕ' - {0x1E55, {0x0070, 0x0301}}, // 'ṕ' - {0x1E56, {0x0050, 0x0307}}, // 'Ṗ' - {0x1E57, {0x0070, 0x0307}}, // 'ṗ' - {0x1E58, {0x0052, 0x0307}}, // 'Ṙ' - {0x1E59, {0x0072, 0x0307}}, // 'ṙ' - {0x1E5A, {0x0052, 0x0323}}, // 'Ṛ' - {0x1E5B, {0x0072, 0x0323}}, // 'ṛ' - {0x1E5C, {0x0052, 0x0323, 0x0304}}, // 'Ṝ' - {0x1E5D, {0x0072, 0x0323, 0x0304}}, // 'ṝ' - {0x1E5E, {0x0052, 0x0331}}, // 'Ṟ' - {0x1E5F, {0x0072, 0x0331}}, // 'ṟ' - {0x1E60, {0x0053, 0x0307}}, // 'Ṡ' - {0x1E61, {0x0073, 0x0307}}, // 'ṡ' - {0x1E62, {0x0053, 0x0323}}, // 'Ṣ' - {0x1E63, {0x0073, 0x0323}}, // 'ṣ' - {0x1E64, {0x0053, 0x0301, 0x0307}}, // 'Ṥ' - {0x1E65, {0x0073, 0x0301, 0x0307}}, // 'ṥ' - {0x1E66, {0x0053, 0x030C, 0x0307}}, // 'Ṧ' - {0x1E67, {0x0073, 0x030C, 0x0307}}, // 'ṧ' - {0x1E68, {0x0053, 0x0323, 0x0307}}, // 'Ṩ' - {0x1E69, {0x0073, 0x0323, 0x0307}}, // 'ṩ' - {0x1E6A, {0x0054, 0x0307}}, // 'Ṫ' - {0x1E6B, {0x0074, 0x0307}}, // 'ṫ' - {0x1E6C, {0x0054, 0x0323}}, // 'Ṭ' - {0x1E6D, {0x0074, 0x0323}}, // 'ṭ' - {0x1E6E, {0x0054, 0x0331}}, // 'Ṯ' - {0x1E6F, {0x0074, 0x0331}}, // 'ṯ' - {0x1E70, {0x0054, 0x032D}}, // 'Ṱ' - {0x1E71, {0x0074, 0x032D}}, // 'ṱ' - {0x1E72, {0x0055, 0x0324}}, // 'Ṳ' - {0x1E73, {0x0075, 0x0324}}, // 'ṳ' - {0x1E74, {0x0055, 0x0330}}, // 'Ṵ' - {0x1E75, {0x0075, 0x0330}}, // 'ṵ' - {0x1E76, {0x0055, 0x032D}}, // 'Ṷ' - {0x1E77, {0x0075, 0x032D}}, // 'ṷ' - {0x1E78, {0x0055, 0x0303, 0x0301}}, // 'Ṹ' - {0x1E79, {0x0075, 0x0303, 0x0301}}, // 'ṹ' - {0x1E7A, {0x0055, 0x0304, 0x0308}}, // 'Ṻ' - {0x1E7B, {0x0075, 0x0304, 0x0308}}, // 'ṻ' - {0x1E7C, {0x0056, 0x0303}}, // 'Ṽ' - {0x1E7D, {0x0076, 0x0303}}, // 'ṽ' - {0x1E7E, {0x0056, 0x0323}}, // 'Ṿ' - {0x1E7F, {0x0076, 0x0323}}, // 'ṿ' - {0x1E80, {0x0057, 0x0300}}, // 'Ẁ' - {0x1E81, {0x0077, 0x0300}}, // 'ẁ' - {0x1E82, {0x0057, 0x0301}}, // 'Ẃ' - {0x1E83, {0x0077, 0x0301}}, // 'ẃ' - {0x1E84, {0x0057, 0x0308}}, // 'Ẅ' - {0x1E85, {0x0077, 0x0308}}, // 'ẅ' - {0x1E86, {0x0057, 0x0307}}, // 'Ẇ' - {0x1E87, {0x0077, 0x0307}}, // 'ẇ' - {0x1E88, {0x0057, 0x0323}}, // 'Ẉ' - {0x1E89, {0x0077, 0x0323}}, // 'ẉ' - {0x1E8A, {0x0058, 0x0307}}, // 'Ẋ' - {0x1E8B, {0x0078, 0x0307}}, // 'ẋ' - {0x1E8C, {0x0058, 0x0308}}, // 'Ẍ' - {0x1E8D, {0x0078, 0x0308}}, // 'ẍ' - {0x1E8E, {0x0059, 0x0307}}, // 'Ẏ' - {0x1E8F, {0x0079, 0x0307}}, // 'ẏ' - {0x1E90, {0x005A, 0x0302}}, // 'Ẑ' - {0x1E91, {0x007A, 0x0302}}, // 'ẑ' - {0x1E92, {0x005A, 0x0323}}, // 'Ẓ' - {0x1E93, {0x007A, 0x0323}}, // 'ẓ' - {0x1E94, {0x005A, 0x0331}}, // 'Ẕ' - {0x1E95, {0x007A, 0x0331}}, // 'ẕ' - {0x1E96, {0x0068, 0x0331}}, // 'ẖ' - {0x1E97, {0x0074, 0x0308}}, // 'ẗ' - {0x1E98, {0x0077, 0x030A}}, // 'ẘ' - {0x1E99, {0x0079, 0x030A}}, // 'ẙ' - {0x1E9B, {0x017F, 0x0307}}, // 'ẛ' - {0x1EA0, {0x0041, 0x0323}}, // 'Ạ' - {0x1EA1, {0x0061, 0x0323}}, // 'ạ' - {0x1EA2, {0x0041, 0x0309}}, // 'Ả' - {0x1EA3, {0x0061, 0x0309}}, // 'ả' - {0x1EA4, {0x0041, 0x0302, 0x0301}}, // 'Ấ' - {0x1EA5, {0x0061, 0x0302, 0x0301}}, // 'ấ' - {0x1EA6, {0x0041, 0x0302, 0x0300}}, // 'Ầ' - {0x1EA7, {0x0061, 0x0302, 0x0300}}, // 'ầ' - {0x1EA8, {0x0041, 0x0302, 0x0309}}, // 'Ẩ' - {0x1EA9, {0x0061, 0x0302, 0x0309}}, // 'ẩ' - {0x1EAA, {0x0041, 0x0302, 0x0303}}, // 'Ẫ' - {0x1EAB, {0x0061, 0x0302, 0x0303}}, // 'ẫ' - {0x1EAC, {0x0041, 0x0323, 0x0302}}, // 'Ậ' - {0x1EAD, {0x0061, 0x0323, 0x0302}}, // 'ậ' - {0x1EAE, {0x0041, 0x0306, 0x0301}}, // 'Ắ' - {0x1EAF, {0x0061, 0x0306, 0x0301}}, // 'ắ' - {0x1EB0, {0x0041, 0x0306, 0x0300}}, // 'Ằ' - {0x1EB1, {0x0061, 0x0306, 0x0300}}, // 'ằ' - {0x1EB2, {0x0041, 0x0306, 0x0309}}, // 'Ẳ' - {0x1EB3, {0x0061, 0x0306, 0x0309}}, // 'ẳ' - {0x1EB4, {0x0041, 0x0306, 0x0303}}, // 'Ẵ' - {0x1EB5, {0x0061, 0x0306, 0x0303}}, // 'ẵ' - {0x1EB6, {0x0041, 0x0323, 0x0306}}, // 'Ặ' - {0x1EB7, {0x0061, 0x0323, 0x0306}}, // 'ặ' - {0x1EB8, {0x0045, 0x0323}}, // 'Ẹ' - {0x1EB9, {0x0065, 0x0323}}, // 'ẹ' - {0x1EBA, {0x0045, 0x0309}}, // 'Ẻ' - {0x1EBB, {0x0065, 0x0309}}, // 'ẻ' - {0x1EBC, {0x0045, 0x0303}}, // 'Ẽ' - {0x1EBD, {0x0065, 0x0303}}, // 'ẽ' - {0x1EBE, {0x0045, 0x0302, 0x0301}}, // 'Ế' - {0x1EBF, {0x0065, 0x0302, 0x0301}}, // 'ế' - {0x1EC0, {0x0045, 0x0302, 0x0300}}, // 'Ề' - {0x1EC1, {0x0065, 0x0302, 0x0300}}, // 'ề' - {0x1EC2, {0x0045, 0x0302, 0x0309}}, // 'Ể' - {0x1EC3, {0x0065, 0x0302, 0x0309}}, // 'ể' - {0x1EC4, {0x0045, 0x0302, 0x0303}}, // 'Ễ' - {0x1EC5, {0x0065, 0x0302, 0x0303}}, // 'ễ' - {0x1EC6, {0x0045, 0x0323, 0x0302}}, // 'Ệ' - {0x1EC7, {0x0065, 0x0323, 0x0302}}, // 'ệ' - {0x1EC8, {0x0049, 0x0309}}, // 'Ỉ' - {0x1EC9, {0x0069, 0x0309}}, // 'ỉ' - {0x1ECA, {0x0049, 0x0323}}, // 'Ị' - {0x1ECB, {0x0069, 0x0323}}, // 'ị' - {0x1ECC, {0x004F, 0x0323}}, // 'Ọ' - {0x1ECD, {0x006F, 0x0323}}, // 'ọ' - {0x1ECE, {0x004F, 0x0309}}, // 'Ỏ' - {0x1ECF, {0x006F, 0x0309}}, // 'ỏ' - {0x1ED0, {0x004F, 0x0302, 0x0301}}, // 'Ố' - {0x1ED1, {0x006F, 0x0302, 0x0301}}, // 'ố' - {0x1ED2, {0x004F, 0x0302, 0x0300}}, // 'Ồ' - {0x1ED3, {0x006F, 0x0302, 0x0300}}, // 'ồ' - {0x1ED4, {0x004F, 0x0302, 0x0309}}, // 'Ổ' - {0x1ED5, {0x006F, 0x0302, 0x0309}}, // 'ổ' - {0x1ED6, {0x004F, 0x0302, 0x0303}}, // 'Ỗ' - {0x1ED7, {0x006F, 0x0302, 0x0303}}, // 'ỗ' - {0x1ED8, {0x004F, 0x0323, 0x0302}}, // 'Ộ' - {0x1ED9, {0x006F, 0x0323, 0x0302}}, // 'ộ' - {0x1EDA, {0x004F, 0x031B, 0x0301}}, // 'Ớ' - {0x1EDB, {0x006F, 0x031B, 0x0301}}, // 'ớ' - {0x1EDC, {0x004F, 0x031B, 0x0300}}, // 'Ờ' - {0x1EDD, {0x006F, 0x031B, 0x0300}}, // 'ờ' - {0x1EDE, {0x004F, 0x031B, 0x0309}}, // 'Ở' - {0x1EDF, {0x006F, 0x031B, 0x0309}}, // 'ở' - {0x1EE0, {0x004F, 0x031B, 0x0303}}, // 'Ỡ' - {0x1EE1, {0x006F, 0x031B, 0x0303}}, // 'ỡ' - {0x1EE2, {0x004F, 0x031B, 0x0323}}, // 'Ợ' - {0x1EE3, {0x006F, 0x031B, 0x0323}}, // 'ợ' - {0x1EE4, {0x0055, 0x0323}}, // 'Ụ' - {0x1EE5, {0x0075, 0x0323}}, // 'ụ' - {0x1EE6, {0x0055, 0x0309}}, // 'Ủ' - {0x1EE7, {0x0075, 0x0309}}, // 'ủ' - {0x1EE8, {0x0055, 0x031B, 0x0301}}, // 'Ứ' - {0x1EE9, {0x0075, 0x031B, 0x0301}}, // 'ứ' - {0x1EEA, {0x0055, 0x031B, 0x0300}}, // 'Ừ' - {0x1EEB, {0x0075, 0x031B, 0x0300}}, // 'ừ' - {0x1EEC, {0x0055, 0x031B, 0x0309}}, // 'Ử' - {0x1EED, {0x0075, 0x031B, 0x0309}}, // 'ử' - {0x1EEE, {0x0055, 0x031B, 0x0303}}, // 'Ữ' - {0x1EEF, {0x0075, 0x031B, 0x0303}}, // 'ữ' - {0x1EF0, {0x0055, 0x031B, 0x0323}}, // 'Ự' - {0x1EF1, {0x0075, 0x031B, 0x0323}}, // 'ự' - {0x1EF2, {0x0059, 0x0300}}, // 'Ỳ' - {0x1EF3, {0x0079, 0x0300}}, // 'ỳ' - {0x1EF4, {0x0059, 0x0323}}, // 'Ỵ' - {0x1EF5, {0x0079, 0x0323}}, // 'ỵ' - {0x1EF6, {0x0059, 0x0309}}, // 'Ỷ' - {0x1EF7, {0x0079, 0x0309}}, // 'ỷ' - {0x1EF8, {0x0059, 0x0303}}, // 'Ỹ' - {0x1EF9, {0x0079, 0x0303}}, // 'ỹ' - {0x1F00, {0x03B1, 0x0313}}, // 'ἀ' - {0x1F01, {0x03B1, 0x0314}}, // 'ἁ' - {0x1F02, {0x03B1, 0x0313, 0x0300}}, // 'ἂ' - {0x1F03, {0x03B1, 0x0314, 0x0300}}, // 'ἃ' - {0x1F04, {0x03B1, 0x0313, 0x0301}}, // 'ἄ' - {0x1F05, {0x03B1, 0x0314, 0x0301}}, // 'ἅ' - {0x1F06, {0x03B1, 0x0313, 0x0342}}, // 'ἆ' - {0x1F07, {0x03B1, 0x0314, 0x0342}}, // 'ἇ' - {0x1F08, {0x0391, 0x0313}}, // 'Ἀ' - {0x1F09, {0x0391, 0x0314}}, // 'Ἁ' - {0x1F0A, {0x0391, 0x0313, 0x0300}}, // 'Ἂ' - {0x1F0B, {0x0391, 0x0314, 0x0300}}, // 'Ἃ' - {0x1F0C, {0x0391, 0x0313, 0x0301}}, // 'Ἄ' - {0x1F0D, {0x0391, 0x0314, 0x0301}}, // 'Ἅ' - {0x1F0E, {0x0391, 0x0313, 0x0342}}, // 'Ἆ' - {0x1F0F, {0x0391, 0x0314, 0x0342}}, // 'Ἇ' - {0x1F10, {0x03B5, 0x0313}}, // 'ἐ' - {0x1F11, {0x03B5, 0x0314}}, // 'ἑ' - {0x1F12, {0x03B5, 0x0313, 0x0300}}, // 'ἒ' - {0x1F13, {0x03B5, 0x0314, 0x0300}}, // 'ἓ' - {0x1F14, {0x03B5, 0x0313, 0x0301}}, // 'ἔ' - {0x1F15, {0x03B5, 0x0314, 0x0301}}, // 'ἕ' - {0x1F18, {0x0395, 0x0313}}, // 'Ἐ' - {0x1F19, {0x0395, 0x0314}}, // 'Ἑ' - {0x1F1A, {0x0395, 0x0313, 0x0300}}, // 'Ἒ' - {0x1F1B, {0x0395, 0x0314, 0x0300}}, // 'Ἓ' - {0x1F1C, {0x0395, 0x0313, 0x0301}}, // 'Ἔ' - {0x1F1D, {0x0395, 0x0314, 0x0301}}, // 'Ἕ' - {0x1F20, {0x03B7, 0x0313}}, // 'ἠ' - {0x1F21, {0x03B7, 0x0314}}, // 'ἡ' - {0x1F22, {0x03B7, 0x0313, 0x0300}}, // 'ἢ' - {0x1F23, {0x03B7, 0x0314, 0x0300}}, // 'ἣ' - {0x1F24, {0x03B7, 0x0313, 0x0301}}, // 'ἤ' - {0x1F25, {0x03B7, 0x0314, 0x0301}}, // 'ἥ' - {0x1F26, {0x03B7, 0x0313, 0x0342}}, // 'ἦ' - {0x1F27, {0x03B7, 0x0314, 0x0342}}, // 'ἧ' - {0x1F28, {0x0397, 0x0313}}, // 'Ἠ' - {0x1F29, {0x0397, 0x0314}}, // 'Ἡ' - {0x1F2A, {0x0397, 0x0313, 0x0300}}, // 'Ἢ' - {0x1F2B, {0x0397, 0x0314, 0x0300}}, // 'Ἣ' - {0x1F2C, {0x0397, 0x0313, 0x0301}}, // 'Ἤ' - {0x1F2D, {0x0397, 0x0314, 0x0301}}, // 'Ἥ' - {0x1F2E, {0x0397, 0x0313, 0x0342}}, // 'Ἦ' - {0x1F2F, {0x0397, 0x0314, 0x0342}}, // 'Ἧ' - {0x1F30, {0x03B9, 0x0313}}, // 'ἰ' - {0x1F31, {0x03B9, 0x0314}}, // 'ἱ' - {0x1F32, {0x03B9, 0x0313, 0x0300}}, // 'ἲ' - {0x1F33, {0x03B9, 0x0314, 0x0300}}, // 'ἳ' - {0x1F34, {0x03B9, 0x0313, 0x0301}}, // 'ἴ' - {0x1F35, {0x03B9, 0x0314, 0x0301}}, // 'ἵ' - {0x1F36, {0x03B9, 0x0313, 0x0342}}, // 'ἶ' - {0x1F37, {0x03B9, 0x0314, 0x0342}}, // 'ἷ' - {0x1F38, {0x0399, 0x0313}}, // 'Ἰ' - {0x1F39, {0x0399, 0x0314}}, // 'Ἱ' - {0x1F3A, {0x0399, 0x0313, 0x0300}}, // 'Ἲ' - {0x1F3B, {0x0399, 0x0314, 0x0300}}, // 'Ἳ' - {0x1F3C, {0x0399, 0x0313, 0x0301}}, // 'Ἴ' - {0x1F3D, {0x0399, 0x0314, 0x0301}}, // 'Ἵ' - {0x1F3E, {0x0399, 0x0313, 0x0342}}, // 'Ἶ' - {0x1F3F, {0x0399, 0x0314, 0x0342}}, // 'Ἷ' - {0x1F40, {0x03BF, 0x0313}}, // 'ὀ' - {0x1F41, {0x03BF, 0x0314}}, // 'ὁ' - {0x1F42, {0x03BF, 0x0313, 0x0300}}, // 'ὂ' - {0x1F43, {0x03BF, 0x0314, 0x0300}}, // 'ὃ' - {0x1F44, {0x03BF, 0x0313, 0x0301}}, // 'ὄ' - {0x1F45, {0x03BF, 0x0314, 0x0301}}, // 'ὅ' - {0x1F48, {0x039F, 0x0313}}, // 'Ὀ' - {0x1F49, {0x039F, 0x0314}}, // 'Ὁ' - {0x1F4A, {0x039F, 0x0313, 0x0300}}, // 'Ὂ' - {0x1F4B, {0x039F, 0x0314, 0x0300}}, // 'Ὃ' - {0x1F4C, {0x039F, 0x0313, 0x0301}}, // 'Ὄ' - {0x1F4D, {0x039F, 0x0314, 0x0301}}, // 'Ὅ' - {0x1F50, {0x03C5, 0x0313}}, // 'ὐ' - {0x1F51, {0x03C5, 0x0314}}, // 'ὑ' - {0x1F52, {0x03C5, 0x0313, 0x0300}}, // 'ὒ' - {0x1F53, {0x03C5, 0x0314, 0x0300}}, // 'ὓ' - {0x1F54, {0x03C5, 0x0313, 0x0301}}, // 'ὔ' - {0x1F55, {0x03C5, 0x0314, 0x0301}}, // 'ὕ' - {0x1F56, {0x03C5, 0x0313, 0x0342}}, // 'ὖ' - {0x1F57, {0x03C5, 0x0314, 0x0342}}, // 'ὗ' - {0x1F59, {0x03A5, 0x0314}}, // 'Ὑ' - {0x1F5B, {0x03A5, 0x0314, 0x0300}}, // 'Ὓ' - {0x1F5D, {0x03A5, 0x0314, 0x0301}}, // 'Ὕ' - {0x1F5F, {0x03A5, 0x0314, 0x0342}}, // 'Ὗ' - {0x1F60, {0x03C9, 0x0313}}, // 'ὠ' - {0x1F61, {0x03C9, 0x0314}}, // 'ὡ' - {0x1F62, {0x03C9, 0x0313, 0x0300}}, // 'ὢ' - {0x1F63, {0x03C9, 0x0314, 0x0300}}, // 'ὣ' - {0x1F64, {0x03C9, 0x0313, 0x0301}}, // 'ὤ' - {0x1F65, {0x03C9, 0x0314, 0x0301}}, // 'ὥ' - {0x1F66, {0x03C9, 0x0313, 0x0342}}, // 'ὦ' - {0x1F67, {0x03C9, 0x0314, 0x0342}}, // 'ὧ' - {0x1F68, {0x03A9, 0x0313}}, // 'Ὠ' - {0x1F69, {0x03A9, 0x0314}}, // 'Ὡ' - {0x1F6A, {0x03A9, 0x0313, 0x0300}}, // 'Ὢ' - {0x1F6B, {0x03A9, 0x0314, 0x0300}}, // 'Ὣ' - {0x1F6C, {0x03A9, 0x0313, 0x0301}}, // 'Ὤ' - {0x1F6D, {0x03A9, 0x0314, 0x0301}}, // 'Ὥ' - {0x1F6E, {0x03A9, 0x0313, 0x0342}}, // 'Ὦ' - {0x1F6F, {0x03A9, 0x0314, 0x0342}}, // 'Ὧ' - {0x1F70, {0x03B1, 0x0300}}, // 'ὰ' - {0x1F71, {0x03B1, 0x0301}}, // 'ά' - {0x1F72, {0x03B5, 0x0300}}, // 'ὲ' - {0x1F73, {0x03B5, 0x0301}}, // 'έ' - {0x1F74, {0x03B7, 0x0300}}, // 'ὴ' - {0x1F75, {0x03B7, 0x0301}}, // 'ή' - {0x1F76, {0x03B9, 0x0300}}, // 'ὶ' - {0x1F77, {0x03B9, 0x0301}}, // 'ί' - {0x1F78, {0x03BF, 0x0300}}, // 'ὸ' - {0x1F79, {0x03BF, 0x0301}}, // 'ό' - {0x1F7A, {0x03C5, 0x0300}}, // 'ὺ' - {0x1F7B, {0x03C5, 0x0301}}, // 'ύ' - {0x1F7C, {0x03C9, 0x0300}}, // 'ὼ' - {0x1F7D, {0x03C9, 0x0301}}, // 'ώ' - {0x1F80, {0x03B1, 0x0313, 0x0345}}, // 'ᾀ' - {0x1F81, {0x03B1, 0x0314, 0x0345}}, // 'ᾁ' - {0x1F82, {0x03B1, 0x0313, 0x0300, 0x0345}}, // 'ᾂ' - {0x1F83, {0x03B1, 0x0314, 0x0300, 0x0345}}, // 'ᾃ' - {0x1F84, {0x03B1, 0x0313, 0x0301, 0x0345}}, // 'ᾄ' - {0x1F85, {0x03B1, 0x0314, 0x0301, 0x0345}}, // 'ᾅ' - {0x1F86, {0x03B1, 0x0313, 0x0342, 0x0345}}, // 'ᾆ' - {0x1F87, {0x03B1, 0x0314, 0x0342, 0x0345}}, // 'ᾇ' - {0x1F88, {0x0391, 0x0313, 0x0345}}, // 'ᾈ' - {0x1F89, {0x0391, 0x0314, 0x0345}}, // 'ᾉ' - {0x1F8A, {0x0391, 0x0313, 0x0300, 0x0345}}, // 'ᾊ' - {0x1F8B, {0x0391, 0x0314, 0x0300, 0x0345}}, // 'ᾋ' - {0x1F8C, {0x0391, 0x0313, 0x0301, 0x0345}}, // 'ᾌ' - {0x1F8D, {0x0391, 0x0314, 0x0301, 0x0345}}, // 'ᾍ' - {0x1F8E, {0x0391, 0x0313, 0x0342, 0x0345}}, // 'ᾎ' - {0x1F8F, {0x0391, 0x0314, 0x0342, 0x0345}}, // 'ᾏ' - {0x1F90, {0x03B7, 0x0313, 0x0345}}, // 'ᾐ' - {0x1F91, {0x03B7, 0x0314, 0x0345}}, // 'ᾑ' - {0x1F92, {0x03B7, 0x0313, 0x0300, 0x0345}}, // 'ᾒ' - {0x1F93, {0x03B7, 0x0314, 0x0300, 0x0345}}, // 'ᾓ' - {0x1F94, {0x03B7, 0x0313, 0x0301, 0x0345}}, // 'ᾔ' - {0x1F95, {0x03B7, 0x0314, 0x0301, 0x0345}}, // 'ᾕ' - {0x1F96, {0x03B7, 0x0313, 0x0342, 0x0345}}, // 'ᾖ' - {0x1F97, {0x03B7, 0x0314, 0x0342, 0x0345}}, // 'ᾗ' - {0x1F98, {0x0397, 0x0313, 0x0345}}, // 'ᾘ' - {0x1F99, {0x0397, 0x0314, 0x0345}}, // 'ᾙ' - {0x1F9A, {0x0397, 0x0313, 0x0300, 0x0345}}, // 'ᾚ' - {0x1F9B, {0x0397, 0x0314, 0x0300, 0x0345}}, // 'ᾛ' - {0x1F9C, {0x0397, 0x0313, 0x0301, 0x0345}}, // 'ᾜ' - {0x1F9D, {0x0397, 0x0314, 0x0301, 0x0345}}, // 'ᾝ' - {0x1F9E, {0x0397, 0x0313, 0x0342, 0x0345}}, // 'ᾞ' - {0x1F9F, {0x0397, 0x0314, 0x0342, 0x0345}}, // 'ᾟ' - {0x1FA0, {0x03C9, 0x0313, 0x0345}}, // 'ᾠ' - {0x1FA1, {0x03C9, 0x0314, 0x0345}}, // 'ᾡ' - {0x1FA2, {0x03C9, 0x0313, 0x0300, 0x0345}}, // 'ᾢ' - {0x1FA3, {0x03C9, 0x0314, 0x0300, 0x0345}}, // 'ᾣ' - {0x1FA4, {0x03C9, 0x0313, 0x0301, 0x0345}}, // 'ᾤ' - {0x1FA5, {0x03C9, 0x0314, 0x0301, 0x0345}}, // 'ᾥ' - {0x1FA6, {0x03C9, 0x0313, 0x0342, 0x0345}}, // 'ᾦ' - {0x1FA7, {0x03C9, 0x0314, 0x0342, 0x0345}}, // 'ᾧ' - {0x1FA8, {0x03A9, 0x0313, 0x0345}}, // 'ᾨ' - {0x1FA9, {0x03A9, 0x0314, 0x0345}}, // 'ᾩ' - {0x1FAA, {0x03A9, 0x0313, 0x0300, 0x0345}}, // 'ᾪ' - {0x1FAB, {0x03A9, 0x0314, 0x0300, 0x0345}}, // 'ᾫ' - {0x1FAC, {0x03A9, 0x0313, 0x0301, 0x0345}}, // 'ᾬ' - {0x1FAD, {0x03A9, 0x0314, 0x0301, 0x0345}}, // 'ᾭ' - {0x1FAE, {0x03A9, 0x0313, 0x0342, 0x0345}}, // 'ᾮ' - {0x1FAF, {0x03A9, 0x0314, 0x0342, 0x0345}}, // 'ᾯ' - {0x1FB0, {0x03B1, 0x0306}}, // 'ᾰ' - {0x1FB1, {0x03B1, 0x0304}}, // 'ᾱ' - {0x1FB2, {0x03B1, 0x0300, 0x0345}}, // 'ᾲ' - {0x1FB3, {0x03B1, 0x0345}}, // 'ᾳ' - {0x1FB4, {0x03B1, 0x0301, 0x0345}}, // 'ᾴ' - {0x1FB6, {0x03B1, 0x0342}}, // 'ᾶ' - {0x1FB7, {0x03B1, 0x0342, 0x0345}}, // 'ᾷ' - {0x1FB8, {0x0391, 0x0306}}, // 'Ᾰ' - {0x1FB9, {0x0391, 0x0304}}, // 'Ᾱ' - {0x1FBA, {0x0391, 0x0300}}, // 'Ὰ' - {0x1FBB, {0x0391, 0x0301}}, // 'Ά' - {0x1FBC, {0x0391, 0x0345}}, // 'ᾼ' - {0x1FBE, {0x03B9}}, // 'ι' - {0x1FC1, {0x00A8, 0x0342}}, // '῁' - {0x1FC2, {0x03B7, 0x0300, 0x0345}}, // 'ῂ' - {0x1FC3, {0x03B7, 0x0345}}, // 'ῃ' - {0x1FC4, {0x03B7, 0x0301, 0x0345}}, // 'ῄ' - {0x1FC6, {0x03B7, 0x0342}}, // 'ῆ' - {0x1FC7, {0x03B7, 0x0342, 0x0345}}, // 'ῇ' - {0x1FC8, {0x0395, 0x0300}}, // 'Ὲ' - {0x1FC9, {0x0395, 0x0301}}, // 'Έ' - {0x1FCA, {0x0397, 0x0300}}, // 'Ὴ' - {0x1FCB, {0x0397, 0x0301}}, // 'Ή' - {0x1FCC, {0x0397, 0x0345}}, // 'ῌ' - {0x1FCD, {0x1FBF, 0x0300}}, // '῍' - {0x1FCE, {0x1FBF, 0x0301}}, // '῎' - {0x1FCF, {0x1FBF, 0x0342}}, // '῏' - {0x1FD0, {0x03B9, 0x0306}}, // 'ῐ' - {0x1FD1, {0x03B9, 0x0304}}, // 'ῑ' - {0x1FD2, {0x03B9, 0x0308, 0x0300}}, // 'ῒ' - {0x1FD3, {0x03B9, 0x0308, 0x0301}}, // 'ΐ' - {0x1FD6, {0x03B9, 0x0342}}, // 'ῖ' - {0x1FD7, {0x03B9, 0x0308, 0x0342}}, // 'ῗ' - {0x1FD8, {0x0399, 0x0306}}, // 'Ῐ' - {0x1FD9, {0x0399, 0x0304}}, // 'Ῑ' - {0x1FDA, {0x0399, 0x0300}}, // 'Ὶ' - {0x1FDB, {0x0399, 0x0301}}, // 'Ί' - {0x1FDD, {0x1FFE, 0x0300}}, // '῝' - {0x1FDE, {0x1FFE, 0x0301}}, // '῞' - {0x1FDF, {0x1FFE, 0x0342}}, // '῟' - {0x1FE0, {0x03C5, 0x0306}}, // 'ῠ' - {0x1FE1, {0x03C5, 0x0304}}, // 'ῡ' - {0x1FE2, {0x03C5, 0x0308, 0x0300}}, // 'ῢ' - {0x1FE3, {0x03C5, 0x0308, 0x0301}}, // 'ΰ' - {0x1FE4, {0x03C1, 0x0313}}, // 'ῤ' - {0x1FE5, {0x03C1, 0x0314}}, // 'ῥ' - {0x1FE6, {0x03C5, 0x0342}}, // 'ῦ' - {0x1FE7, {0x03C5, 0x0308, 0x0342}}, // 'ῧ' - {0x1FE8, {0x03A5, 0x0306}}, // 'Ῠ' - {0x1FE9, {0x03A5, 0x0304}}, // 'Ῡ' - {0x1FEA, {0x03A5, 0x0300}}, // 'Ὺ' - {0x1FEB, {0x03A5, 0x0301}}, // 'Ύ' - {0x1FEC, {0x03A1, 0x0314}}, // 'Ῥ' - {0x1FED, {0x00A8, 0x0300}}, // '῭' - {0x1FEE, {0x00A8, 0x0301}}, // '΅' - {0x1FEF, {0x0060}}, // '`' - {0x1FF2, {0x03C9, 0x0300, 0x0345}}, // 'ῲ' - {0x1FF3, {0x03C9, 0x0345}}, // 'ῳ' - {0x1FF4, {0x03C9, 0x0301, 0x0345}}, // 'ῴ' - {0x1FF6, {0x03C9, 0x0342}}, // 'ῶ' - {0x1FF7, {0x03C9, 0x0342, 0x0345}}, // 'ῷ' - {0x1FF8, {0x039F, 0x0300}}, // 'Ὸ' - {0x1FF9, {0x039F, 0x0301}}, // 'Ό' - {0x1FFA, {0x03A9, 0x0300}}, // 'Ὼ' - {0x1FFB, {0x03A9, 0x0301}}, // 'Ώ' - {0x1FFC, {0x03A9, 0x0345}}, // 'ῼ' - {0x1FFD, {0x00B4}}, // '´' - {0x2000, {0x2002}}, - {0x2001, {0x2003}}, - {0x2126, {0x03A9}}, // 'Ω' - {0x212A, {0x004B}}, // 'K' - {0x212B, {0x0041, 0x030A}}, // 'Å' - {0x219A, {0x2190, 0x0338}}, // '↚' - {0x219B, {0x2192, 0x0338}}, // '↛' - {0x21AE, {0x2194, 0x0338}}, // '↮' - {0x21CD, {0x21D0, 0x0338}}, // '⇍' - {0x21CE, {0x21D4, 0x0338}}, // '⇎' - {0x21CF, {0x21D2, 0x0338}}, // '⇏' - {0x2204, {0x2203, 0x0338}}, // '∄' - {0x2209, {0x2208, 0x0338}}, // '∉' - {0x220C, {0x220B, 0x0338}}, // '∌' - {0x2224, {0x2223, 0x0338}}, // '∤' - {0x2226, {0x2225, 0x0338}}, // '∦' - {0x2241, {0x223C, 0x0338}}, // '≁' - {0x2244, {0x2243, 0x0338}}, // '≄' - {0x2247, {0x2245, 0x0338}}, // '≇' - {0x2249, {0x2248, 0x0338}}, // '≉' - {0x2260, {0x003D, 0x0338}}, // '≠' - {0x2262, {0x2261, 0x0338}}, // '≢' - {0x226D, {0x224D, 0x0338}}, // '≭' - {0x226E, {0x003C, 0x0338}}, // '≮' - {0x226F, {0x003E, 0x0338}}, // '≯' - {0x2270, {0x2264, 0x0338}}, // '≰' - {0x2271, {0x2265, 0x0338}}, // '≱' - {0x2274, {0x2272, 0x0338}}, // '≴' - {0x2275, {0x2273, 0x0338}}, // '≵' - {0x2278, {0x2276, 0x0338}}, // '≸' - {0x2279, {0x2277, 0x0338}}, // '≹' - {0x2280, {0x227A, 0x0338}}, // '⊀' - {0x2281, {0x227B, 0x0338}}, // '⊁' - {0x2284, {0x2282, 0x0338}}, // '⊄' - {0x2285, {0x2283, 0x0338}}, // '⊅' - {0x2288, {0x2286, 0x0338}}, // '⊈' - {0x2289, {0x2287, 0x0338}}, // '⊉' - {0x22AC, {0x22A2, 0x0338}}, // '⊬' - {0x22AD, {0x22A8, 0x0338}}, // '⊭' - {0x22AE, {0x22A9, 0x0338}}, // '⊮' - {0x22AF, {0x22AB, 0x0338}}, // '⊯' - {0x22E0, {0x227C, 0x0338}}, // '⋠' - {0x22E1, {0x227D, 0x0338}}, // '⋡' - {0x22E2, {0x2291, 0x0338}}, // '⋢' - {0x22E3, {0x2292, 0x0338}}, // '⋣' - {0x22EA, {0x22B2, 0x0338}}, // '⋪' - {0x22EB, {0x22B3, 0x0338}}, // '⋫' - {0x22EC, {0x22B4, 0x0338}}, // '⋬' - {0x22ED, {0x22B5, 0x0338}}, // '⋭' - {0x2329, {0x3008}}, // '〈' - {0x232A, {0x3009}}, // '〉' - {0x2ADC, {0x2ADD, 0x0338}}, // '⫝̸' - {0x304C, {0x304B, 0x3099}}, // 'が' - {0x304E, {0x304D, 0x3099}}, // 'ぎ' - {0x3050, {0x304F, 0x3099}}, // 'ぐ' - {0x3052, {0x3051, 0x3099}}, // 'げ' - {0x3054, {0x3053, 0x3099}}, // 'ご' - {0x3056, {0x3055, 0x3099}}, // 'ざ' - {0x3058, {0x3057, 0x3099}}, // 'じ' - {0x305A, {0x3059, 0x3099}}, // 'ず' - {0x305C, {0x305B, 0x3099}}, // 'ぜ' - {0x305E, {0x305D, 0x3099}}, // 'ぞ' - {0x3060, {0x305F, 0x3099}}, // 'だ' - {0x3062, {0x3061, 0x3099}}, // 'ぢ' - {0x3065, {0x3064, 0x3099}}, // 'づ' - {0x3067, {0x3066, 0x3099}}, // 'で' - {0x3069, {0x3068, 0x3099}}, // 'ど' - {0x3070, {0x306F, 0x3099}}, // 'ば' - {0x3071, {0x306F, 0x309A}}, // 'ぱ' - {0x3073, {0x3072, 0x3099}}, // 'び' - {0x3074, {0x3072, 0x309A}}, // 'ぴ' - {0x3076, {0x3075, 0x3099}}, // 'ぶ' - {0x3077, {0x3075, 0x309A}}, // 'ぷ' - {0x3079, {0x3078, 0x3099}}, // 'べ' - {0x307A, {0x3078, 0x309A}}, // 'ぺ' - {0x307C, {0x307B, 0x3099}}, // 'ぼ' - {0x307D, {0x307B, 0x309A}}, // 'ぽ' - {0x3094, {0x3046, 0x3099}}, // 'ゔ' - {0x309E, {0x309D, 0x3099}}, // 'ゞ' - {0x30AC, {0x30AB, 0x3099}}, // 'ガ' - {0x30AE, {0x30AD, 0x3099}}, // 'ギ' - {0x30B0, {0x30AF, 0x3099}}, // 'グ' - {0x30B2, {0x30B1, 0x3099}}, // 'ゲ' - {0x30B4, {0x30B3, 0x3099}}, // 'ゴ' - {0x30B6, {0x30B5, 0x3099}}, // 'ザ' - {0x30B8, {0x30B7, 0x3099}}, // 'ジ' - {0x30BA, {0x30B9, 0x3099}}, // 'ズ' - {0x30BC, {0x30BB, 0x3099}}, // 'ゼ' - {0x30BE, {0x30BD, 0x3099}}, // 'ゾ' - {0x30C0, {0x30BF, 0x3099}}, // 'ダ' - {0x30C2, {0x30C1, 0x3099}}, // 'ヂ' - {0x30C5, {0x30C4, 0x3099}}, // 'ヅ' - {0x30C7, {0x30C6, 0x3099}}, // 'デ' - {0x30C9, {0x30C8, 0x3099}}, // 'ド' - {0x30D0, {0x30CF, 0x3099}}, // 'バ' - {0x30D1, {0x30CF, 0x309A}}, // 'パ' - {0x30D3, {0x30D2, 0x3099}}, // 'ビ' - {0x30D4, {0x30D2, 0x309A}}, // 'ピ' - {0x30D6, {0x30D5, 0x3099}}, // 'ブ' - {0x30D7, {0x30D5, 0x309A}}, // 'プ' - {0x30D9, {0x30D8, 0x3099}}, // 'ベ' - {0x30DA, {0x30D8, 0x309A}}, // 'ペ' - {0x30DC, {0x30DB, 0x3099}}, // 'ボ' - {0x30DD, {0x30DB, 0x309A}}, // 'ポ' - {0x30F4, {0x30A6, 0x3099}}, // 'ヴ' - {0x30F7, {0x30EF, 0x3099}}, // 'ヷ' - {0x30F8, {0x30F0, 0x3099}}, // 'ヸ' - {0x30F9, {0x30F1, 0x3099}}, // 'ヹ' - {0x30FA, {0x30F2, 0x3099}}, // 'ヺ' - {0x30FE, {0x30FD, 0x3099}}, // 'ヾ' - {0xF900, {0x8C48}}, // '豈' - {0xF901, {0x66F4}}, // '更' - {0xF902, {0x8ECA}}, // '車' - {0xF903, {0x8CC8}}, // '賈' - {0xF904, {0x6ED1}}, // '滑' - {0xF905, {0x4E32}}, // '串' - {0xF906, {0x53E5}}, // '句' - {0xF907, {0x9F9C}}, // '龜' - {0xF908, {0x9F9C}}, // '龜' - {0xF909, {0x5951}}, // '契' - {0xF90A, {0x91D1}}, // '金' - {0xF90B, {0x5587}}, // '喇' - {0xF90C, {0x5948}}, // '奈' - {0xF90D, {0x61F6}}, // '懶' - {0xF90E, {0x7669}}, // '癩' - {0xF90F, {0x7F85}}, // '羅' - {0xF910, {0x863F}}, // '蘿' - {0xF911, {0x87BA}}, // '螺' - {0xF912, {0x88F8}}, // '裸' - {0xF913, {0x908F}}, // '邏' - {0xF914, {0x6A02}}, // '樂' - {0xF915, {0x6D1B}}, // '洛' - {0xF916, {0x70D9}}, // '烙' - {0xF917, {0x73DE}}, // '珞' - {0xF918, {0x843D}}, // '落' - {0xF919, {0x916A}}, // '酪' - {0xF91A, {0x99F1}}, // '駱' - {0xF91B, {0x4E82}}, // '亂' - {0xF91C, {0x5375}}, // '卵' - {0xF91D, {0x6B04}}, // '欄' - {0xF91E, {0x721B}}, // '爛' - {0xF91F, {0x862D}}, // '蘭' - {0xF920, {0x9E1E}}, // '鸞' - {0xF921, {0x5D50}}, // '嵐' - {0xF922, {0x6FEB}}, // '濫' - {0xF923, {0x85CD}}, // '藍' - {0xF924, {0x8964}}, // '襤' - {0xF925, {0x62C9}}, // '拉' - {0xF926, {0x81D8}}, // '臘' - {0xF927, {0x881F}}, // '蠟' - {0xF928, {0x5ECA}}, // '廊' - {0xF929, {0x6717}}, // '朗' - {0xF92A, {0x6D6A}}, // '浪' - {0xF92B, {0x72FC}}, // '狼' - {0xF92C, {0x90CE}}, // '郎' - {0xF92D, {0x4F86}}, // '來' - {0xF92E, {0x51B7}}, // '冷' - {0xF92F, {0x52DE}}, // '勞' - {0xF930, {0x64C4}}, // '擄' - {0xF931, {0x6AD3}}, // '櫓' - {0xF932, {0x7210}}, // '爐' - {0xF933, {0x76E7}}, // '盧' - {0xF934, {0x8001}}, // '老' - {0xF935, {0x8606}}, // '蘆' - {0xF936, {0x865C}}, // '虜' - {0xF937, {0x8DEF}}, // '路' - {0xF938, {0x9732}}, // '露' - {0xF939, {0x9B6F}}, // '魯' - {0xF93A, {0x9DFA}}, // '鷺' - {0xF93B, {0x788C}}, // '碌' - {0xF93C, {0x797F}}, // '祿' - {0xF93D, {0x7DA0}}, // '綠' - {0xF93E, {0x83C9}}, // '菉' - {0xF93F, {0x9304}}, // '錄' - {0xF940, {0x9E7F}}, // '鹿' - {0xF941, {0x8AD6}}, // '論' - {0xF942, {0x58DF}}, // '壟' - {0xF943, {0x5F04}}, // '弄' - {0xF944, {0x7C60}}, // '籠' - {0xF945, {0x807E}}, // '聾' - {0xF946, {0x7262}}, // '牢' - {0xF947, {0x78CA}}, // '磊' - {0xF948, {0x8CC2}}, // '賂' - {0xF949, {0x96F7}}, // '雷' - {0xF94A, {0x58D8}}, // '壘' - {0xF94B, {0x5C62}}, // '屢' - {0xF94C, {0x6A13}}, // '樓' - {0xF94D, {0x6DDA}}, // '淚' - {0xF94E, {0x6F0F}}, // '漏' - {0xF94F, {0x7D2F}}, // '累' - {0xF950, {0x7E37}}, // '縷' - {0xF951, {0x964B}}, // '陋' - {0xF952, {0x52D2}}, // '勒' - {0xF953, {0x808B}}, // '肋' - {0xF954, {0x51DC}}, // '凜' - {0xF955, {0x51CC}}, // '凌' - {0xF956, {0x7A1C}}, // '稜' - {0xF957, {0x7DBE}}, // '綾' - {0xF958, {0x83F1}}, // '菱' - {0xF959, {0x9675}}, // '陵' - {0xF95A, {0x8B80}}, // '讀' - {0xF95B, {0x62CF}}, // '拏' - {0xF95C, {0x6A02}}, // '樂' - {0xF95D, {0x8AFE}}, // '諾' - {0xF95E, {0x4E39}}, // '丹' - {0xF95F, {0x5BE7}}, // '寧' - {0xF960, {0x6012}}, // '怒' - {0xF961, {0x7387}}, // '率' - {0xF962, {0x7570}}, // '異' - {0xF963, {0x5317}}, // '北' - {0xF964, {0x78FB}}, // '磻' - {0xF965, {0x4FBF}}, // '便' - {0xF966, {0x5FA9}}, // '復' - {0xF967, {0x4E0D}}, // '不' - {0xF968, {0x6CCC}}, // '泌' - {0xF969, {0x6578}}, // '數' - {0xF96A, {0x7D22}}, // '索' - {0xF96B, {0x53C3}}, // '參' - {0xF96C, {0x585E}}, // '塞' - {0xF96D, {0x7701}}, // '省' - {0xF96E, {0x8449}}, // '葉' - {0xF96F, {0x8AAA}}, // '說' - {0xF970, {0x6BBA}}, // '殺' - {0xF971, {0x8FB0}}, // '辰' - {0xF972, {0x6C88}}, // '沈' - {0xF973, {0x62FE}}, // '拾' - {0xF974, {0x82E5}}, // '若' - {0xF975, {0x63A0}}, // '掠' - {0xF976, {0x7565}}, // '略' - {0xF977, {0x4EAE}}, // '亮' - {0xF978, {0x5169}}, // '兩' - {0xF979, {0x51C9}}, // '凉' - {0xF97A, {0x6881}}, // '梁' - {0xF97B, {0x7CE7}}, // '糧' - {0xF97C, {0x826F}}, // '良' - {0xF97D, {0x8AD2}}, // '諒' - {0xF97E, {0x91CF}}, // '量' - {0xF97F, {0x52F5}}, // '勵' - {0xF980, {0x5442}}, // '呂' - {0xF981, {0x5973}}, // '女' - {0xF982, {0x5EEC}}, // '廬' - {0xF983, {0x65C5}}, // '旅' - {0xF984, {0x6FFE}}, // '濾' - {0xF985, {0x792A}}, // '礪' - {0xF986, {0x95AD}}, // '閭' - {0xF987, {0x9A6A}}, // '驪' - {0xF988, {0x9E97}}, // '麗' - {0xF989, {0x9ECE}}, // '黎' - {0xF98A, {0x529B}}, // '力' - {0xF98B, {0x66C6}}, // '曆' - {0xF98C, {0x6B77}}, // '歷' - {0xF98D, {0x8F62}}, // '轢' - {0xF98E, {0x5E74}}, // '年' - {0xF98F, {0x6190}}, // '憐' - {0xF990, {0x6200}}, // '戀' - {0xF991, {0x649A}}, // '撚' - {0xF992, {0x6F23}}, // '漣' - {0xF993, {0x7149}}, // '煉' - {0xF994, {0x7489}}, // '璉' - {0xF995, {0x79CA}}, // '秊' - {0xF996, {0x7DF4}}, // '練' - {0xF997, {0x806F}}, // '聯' - {0xF998, {0x8F26}}, // '輦' - {0xF999, {0x84EE}}, // '蓮' - {0xF99A, {0x9023}}, // '連' - {0xF99B, {0x934A}}, // '鍊' - {0xF99C, {0x5217}}, // '列' - {0xF99D, {0x52A3}}, // '劣' - {0xF99E, {0x54BD}}, // '咽' - {0xF99F, {0x70C8}}, // '烈' - {0xF9A0, {0x88C2}}, // '裂' - {0xF9A1, {0x8AAA}}, // '說' - {0xF9A2, {0x5EC9}}, // '廉' - {0xF9A3, {0x5FF5}}, // '念' - {0xF9A4, {0x637B}}, // '捻' - {0xF9A5, {0x6BAE}}, // '殮' - {0xF9A6, {0x7C3E}}, // '簾' - {0xF9A7, {0x7375}}, // '獵' - {0xF9A8, {0x4EE4}}, // '令' - {0xF9A9, {0x56F9}}, // '囹' - {0xF9AA, {0x5BE7}}, // '寧' - {0xF9AB, {0x5DBA}}, // '嶺' - {0xF9AC, {0x601C}}, // '怜' - {0xF9AD, {0x73B2}}, // '玲' - {0xF9AE, {0x7469}}, // '瑩' - {0xF9AF, {0x7F9A}}, // '羚' - {0xF9B0, {0x8046}}, // '聆' - {0xF9B1, {0x9234}}, // '鈴' - {0xF9B2, {0x96F6}}, // '零' - {0xF9B3, {0x9748}}, // '靈' - {0xF9B4, {0x9818}}, // '領' - {0xF9B5, {0x4F8B}}, // '例' - {0xF9B6, {0x79AE}}, // '禮' - {0xF9B7, {0x91B4}}, // '醴' - {0xF9B8, {0x96B8}}, // '隸' - {0xF9B9, {0x60E1}}, // '惡' - {0xF9BA, {0x4E86}}, // '了' - {0xF9BB, {0x50DA}}, // '僚' - {0xF9BC, {0x5BEE}}, // '寮' - {0xF9BD, {0x5C3F}}, // '尿' - {0xF9BE, {0x6599}}, // '料' - {0xF9BF, {0x6A02}}, // '樂' - {0xF9C0, {0x71CE}}, // '燎' - {0xF9C1, {0x7642}}, // '療' - {0xF9C2, {0x84FC}}, // '蓼' - {0xF9C3, {0x907C}}, // '遼' - {0xF9C4, {0x9F8D}}, // '龍' - {0xF9C5, {0x6688}}, // '暈' - {0xF9C6, {0x962E}}, // '阮' - {0xF9C7, {0x5289}}, // '劉' - {0xF9C8, {0x677B}}, // '杻' - {0xF9C9, {0x67F3}}, // '柳' - {0xF9CA, {0x6D41}}, // '流' - {0xF9CB, {0x6E9C}}, // '溜' - {0xF9CC, {0x7409}}, // '琉' - {0xF9CD, {0x7559}}, // '留' - {0xF9CE, {0x786B}}, // '硫' - {0xF9CF, {0x7D10}}, // '紐' - {0xF9D0, {0x985E}}, // '類' - {0xF9D1, {0x516D}}, // '六' - {0xF9D2, {0x622E}}, // '戮' - {0xF9D3, {0x9678}}, // '陸' - {0xF9D4, {0x502B}}, // '倫' - {0xF9D5, {0x5D19}}, // '崙' - {0xF9D6, {0x6DEA}}, // '淪' - {0xF9D7, {0x8F2A}}, // '輪' - {0xF9D8, {0x5F8B}}, // '律' - {0xF9D9, {0x6144}}, // '慄' - {0xF9DA, {0x6817}}, // '栗' - {0xF9DB, {0x7387}}, // '率' - {0xF9DC, {0x9686}}, // '隆' - {0xF9DD, {0x5229}}, // '利' - {0xF9DE, {0x540F}}, // '吏' - {0xF9DF, {0x5C65}}, // '履' - {0xF9E0, {0x6613}}, // '易' - {0xF9E1, {0x674E}}, // '李' - {0xF9E2, {0x68A8}}, // '梨' - {0xF9E3, {0x6CE5}}, // '泥' - {0xF9E4, {0x7406}}, // '理' - {0xF9E5, {0x75E2}}, // '痢' - {0xF9E6, {0x7F79}}, // '罹' - {0xF9E7, {0x88CF}}, // '裏' - {0xF9E8, {0x88E1}}, // '裡' - {0xF9E9, {0x91CC}}, // '里' - {0xF9EA, {0x96E2}}, // '離' - {0xF9EB, {0x533F}}, // '匿' - {0xF9EC, {0x6EBA}}, // '溺' - {0xF9ED, {0x541D}}, // '吝' - {0xF9EE, {0x71D0}}, // '燐' - {0xF9EF, {0x7498}}, // '璘' - {0xF9F0, {0x85FA}}, // '藺' - {0xF9F1, {0x96A3}}, // '隣' - {0xF9F2, {0x9C57}}, // '鱗' - {0xF9F3, {0x9E9F}}, // '麟' - {0xF9F4, {0x6797}}, // '林' - {0xF9F5, {0x6DCB}}, // '淋' - {0xF9F6, {0x81E8}}, // '臨' - {0xF9F7, {0x7ACB}}, // '立' - {0xF9F8, {0x7B20}}, // '笠' - {0xF9F9, {0x7C92}}, // '粒' - {0xF9FA, {0x72C0}}, // '狀' - {0xF9FB, {0x7099}}, // '炙' - {0xF9FC, {0x8B58}}, // '識' - {0xF9FD, {0x4EC0}}, // '什' - {0xF9FE, {0x8336}}, // '茶' - {0xF9FF, {0x523A}}, // '刺' - {0xFA00, {0x5207}}, // '切' - {0xFA01, {0x5EA6}}, // '度' - {0xFA02, {0x62D3}}, // '拓' - {0xFA03, {0x7CD6}}, // '糖' - {0xFA04, {0x5B85}}, // '宅' - {0xFA05, {0x6D1E}}, // '洞' - {0xFA06, {0x66B4}}, // '暴' - {0xFA07, {0x8F3B}}, // '輻' - {0xFA08, {0x884C}}, // '行' - {0xFA09, {0x964D}}, // '降' - {0xFA0A, {0x898B}}, // '見' - {0xFA0B, {0x5ED3}}, // '廓' - {0xFA0C, {0x5140}}, // '兀' - {0xFA0D, {0x55C0}}, // '嗀' - {0xFA10, {0x585A}}, // '塚' - {0xFA12, {0x6674}}, // '晴' - {0xFA15, {0x51DE}}, // '凞' - {0xFA16, {0x732A}}, // '猪' - {0xFA17, {0x76CA}}, // '益' - {0xFA18, {0x793C}}, // '礼' - {0xFA19, {0x795E}}, // '神' - {0xFA1A, {0x7965}}, // '祥' - {0xFA1B, {0x798F}}, // '福' - {0xFA1C, {0x9756}}, // '靖' - {0xFA1D, {0x7CBE}}, // '精' - {0xFA1E, {0x7FBD}}, // '羽' - {0xFA20, {0x8612}}, // '蘒' - {0xFA22, {0x8AF8}}, // '諸' - {0xFA25, {0x9038}}, // '逸' - {0xFA26, {0x90FD}}, // '都' - {0xFA2A, {0x98EF}}, // '飯' - {0xFA2B, {0x98FC}}, // '飼' - {0xFA2C, {0x9928}}, // '館' - {0xFA2D, {0x9DB4}}, // '鶴' - {0xFA2E, {0x90DE}}, // '郞' - {0xFA2F, {0x96B7}}, // '隷' - {0xFA30, {0x4FAE}}, // '侮' - {0xFA31, {0x50E7}}, // '僧' - {0xFA32, {0x514D}}, // '免' - {0xFA33, {0x52C9}}, // '勉' - {0xFA34, {0x52E4}}, // '勤' - {0xFA35, {0x5351}}, // '卑' - {0xFA36, {0x559D}}, // '喝' - {0xFA37, {0x5606}}, // '嘆' - {0xFA38, {0x5668}}, // '器' - {0xFA39, {0x5840}}, // '塀' - {0xFA3A, {0x58A8}}, // '墨' - {0xFA3B, {0x5C64}}, // '層' - {0xFA3C, {0x5C6E}}, // '屮' - {0xFA3D, {0x6094}}, // '悔' - {0xFA3E, {0x6168}}, // '慨' - {0xFA3F, {0x618E}}, // '憎' - {0xFA40, {0x61F2}}, // '懲' - {0xFA41, {0x654F}}, // '敏' - {0xFA42, {0x65E2}}, // '既' - {0xFA43, {0x6691}}, // '暑' - {0xFA44, {0x6885}}, // '梅' - {0xFA45, {0x6D77}}, // '海' - {0xFA46, {0x6E1A}}, // '渚' - {0xFA47, {0x6F22}}, // '漢' - {0xFA48, {0x716E}}, // '煮' - {0xFA49, {0x722B}}, // '爫' - {0xFA4A, {0x7422}}, // '琢' - {0xFA4B, {0x7891}}, // '碑' - {0xFA4C, {0x793E}}, // '社' - {0xFA4D, {0x7949}}, // '祉' - {0xFA4E, {0x7948}}, // '祈' - {0xFA4F, {0x7950}}, // '祐' - {0xFA50, {0x7956}}, // '祖' - {0xFA51, {0x795D}}, // '祝' - {0xFA52, {0x798D}}, // '禍' - {0xFA53, {0x798E}}, // '禎' - {0xFA54, {0x7A40}}, // '穀' - {0xFA55, {0x7A81}}, // '突' - {0xFA56, {0x7BC0}}, // '節' - {0xFA57, {0x7DF4}}, // '練' - {0xFA58, {0x7E09}}, // '縉' - {0xFA59, {0x7E41}}, // '繁' - {0xFA5A, {0x7F72}}, // '署' - {0xFA5B, {0x8005}}, // '者' - {0xFA5C, {0x81ED}}, // '臭' - {0xFA5D, {0x8279}}, // '艹' - {0xFA5E, {0x8279}}, // '艹' - {0xFA5F, {0x8457}}, // '著' - {0xFA60, {0x8910}}, // '褐' - {0xFA61, {0x8996}}, // '視' - {0xFA62, {0x8B01}}, // '謁' - {0xFA63, {0x8B39}}, // '謹' - {0xFA64, {0x8CD3}}, // '賓' - {0xFA65, {0x8D08}}, // '贈' - {0xFA66, {0x8FB6}}, // '辶' - {0xFA67, {0x9038}}, // '逸' - {0xFA68, {0x96E3}}, // '難' - {0xFA69, {0x97FF}}, // '響' - {0xFA6A, {0x983B}}, // '頻' - {0xFA6B, {0x6075}}, // '恵' - {0xFA6C, {0x242EE}}, // '𤋮' - {0xFA6D, {0x8218}}, // '舘' - {0xFA70, {0x4E26}}, // '並' - {0xFA71, {0x51B5}}, // '况' - {0xFA72, {0x5168}}, // '全' - {0xFA73, {0x4F80}}, // '侀' - {0xFA74, {0x5145}}, // '充' - {0xFA75, {0x5180}}, // '冀' - {0xFA76, {0x52C7}}, // '勇' - {0xFA77, {0x52FA}}, // '勺' - {0xFA78, {0x559D}}, // '喝' - {0xFA79, {0x5555}}, // '啕' - {0xFA7A, {0x5599}}, // '喙' - {0xFA7B, {0x55E2}}, // '嗢' - {0xFA7C, {0x585A}}, // '塚' - {0xFA7D, {0x58B3}}, // '墳' - {0xFA7E, {0x5944}}, // '奄' - {0xFA7F, {0x5954}}, // '奔' - {0xFA80, {0x5A62}}, // '婢' - {0xFA81, {0x5B28}}, // '嬨' - {0xFA82, {0x5ED2}}, // '廒' - {0xFA83, {0x5ED9}}, // '廙' - {0xFA84, {0x5F69}}, // '彩' - {0xFA85, {0x5FAD}}, // '徭' - {0xFA86, {0x60D8}}, // '惘' - {0xFA87, {0x614E}}, // '慎' - {0xFA88, {0x6108}}, // '愈' - {0xFA89, {0x618E}}, // '憎' - {0xFA8A, {0x6160}}, // '慠' - {0xFA8B, {0x61F2}}, // '懲' - {0xFA8C, {0x6234}}, // '戴' - {0xFA8D, {0x63C4}}, // '揄' - {0xFA8E, {0x641C}}, // '搜' - {0xFA8F, {0x6452}}, // '摒' - {0xFA90, {0x6556}}, // '敖' - {0xFA91, {0x6674}}, // '晴' - {0xFA92, {0x6717}}, // '朗' - {0xFA93, {0x671B}}, // '望' - {0xFA94, {0x6756}}, // '杖' - {0xFA95, {0x6B79}}, // '歹' - {0xFA96, {0x6BBA}}, // '殺' - {0xFA97, {0x6D41}}, // '流' - {0xFA98, {0x6EDB}}, // '滛' - {0xFA99, {0x6ECB}}, // '滋' - {0xFA9A, {0x6F22}}, // '漢' - {0xFA9B, {0x701E}}, // '瀞' - {0xFA9C, {0x716E}}, // '煮' - {0xFA9D, {0x77A7}}, // '瞧' - {0xFA9E, {0x7235}}, // '爵' - {0xFA9F, {0x72AF}}, // '犯' - {0xFAA0, {0x732A}}, // '猪' - {0xFAA1, {0x7471}}, // '瑱' - {0xFAA2, {0x7506}}, // '甆' - {0xFAA3, {0x753B}}, // '画' - {0xFAA4, {0x761D}}, // '瘝' - {0xFAA5, {0x761F}}, // '瘟' - {0xFAA6, {0x76CA}}, // '益' - {0xFAA7, {0x76DB}}, // '盛' - {0xFAA8, {0x76F4}}, // '直' - {0xFAA9, {0x774A}}, // '睊' - {0xFAAA, {0x7740}}, // '着' - {0xFAAB, {0x78CC}}, // '磌' - {0xFAAC, {0x7AB1}}, // '窱' - {0xFAAD, {0x7BC0}}, // '節' - {0xFAAE, {0x7C7B}}, // '类' - {0xFAAF, {0x7D5B}}, // '絛' - {0xFAB0, {0x7DF4}}, // '練' - {0xFAB1, {0x7F3E}}, // '缾' - {0xFAB2, {0x8005}}, // '者' - {0xFAB3, {0x8352}}, // '荒' - {0xFAB4, {0x83EF}}, // '華' - {0xFAB5, {0x8779}}, // '蝹' - {0xFAB6, {0x8941}}, // '襁' - {0xFAB7, {0x8986}}, // '覆' - {0xFAB8, {0x8996}}, // '視' - {0xFAB9, {0x8ABF}}, // '調' - {0xFABA, {0x8AF8}}, // '諸' - {0xFABB, {0x8ACB}}, // '請' - {0xFABC, {0x8B01}}, // '謁' - {0xFABD, {0x8AFE}}, // '諾' - {0xFABE, {0x8AED}}, // '諭' - {0xFABF, {0x8B39}}, // '謹' - {0xFAC0, {0x8B8A}}, // '變' - {0xFAC1, {0x8D08}}, // '贈' - {0xFAC2, {0x8F38}}, // '輸' - {0xFAC3, {0x9072}}, // '遲' - {0xFAC4, {0x9199}}, // '醙' - {0xFAC5, {0x9276}}, // '鉶' - {0xFAC6, {0x967C}}, // '陼' - {0xFAC7, {0x96E3}}, // '難' - {0xFAC8, {0x9756}}, // '靖' - {0xFAC9, {0x97DB}}, // '韛' - {0xFACA, {0x97FF}}, // '響' - {0xFACB, {0x980B}}, // '頋' - {0xFACC, {0x983B}}, // '頻' - {0xFACD, {0x9B12}}, // '鬒' - {0xFACE, {0x9F9C}}, // '龜' - {0xFACF, {0x2284A}}, // '𢡊' - {0xFAD0, {0x22844}}, // '𢡄' - {0xFAD1, {0x233D5}}, // '𣏕' - {0xFAD2, {0x3B9D}}, // '㮝' - {0xFAD3, {0x4018}}, // '䀘' - {0xFAD4, {0x4039}}, // '䀹' - {0xFAD5, {0x25249}}, // '𥉉' - {0xFAD6, {0x25CD0}}, // '𥳐' - {0xFAD7, {0x27ED3}}, // '𧻓' - {0xFAD8, {0x9F43}}, // '齃' - {0xFAD9, {0x9F8E}}, // '龎' - {0xFB1D, {0x05D9, 0x05B4}}, // 'יִ' - {0xFB1F, {0x05F2, 0x05B7}}, // 'ײַ' - {0xFB2A, {0x05E9, 0x05C1}}, // 'שׁ' - {0xFB2B, {0x05E9, 0x05C2}}, // 'שׂ' - {0xFB2C, {0x05E9, 0x05BC, 0x05C1}}, // 'שּׁ' - {0xFB2D, {0x05E9, 0x05BC, 0x05C2}}, // 'שּׂ' - {0xFB2E, {0x05D0, 0x05B7}}, // 'אַ' - {0xFB2F, {0x05D0, 0x05B8}}, // 'אָ' - {0xFB30, {0x05D0, 0x05BC}}, // 'אּ' - {0xFB31, {0x05D1, 0x05BC}}, // 'בּ' - {0xFB32, {0x05D2, 0x05BC}}, // 'גּ' - {0xFB33, {0x05D3, 0x05BC}}, // 'דּ' - {0xFB34, {0x05D4, 0x05BC}}, // 'הּ' - {0xFB35, {0x05D5, 0x05BC}}, // 'וּ' - {0xFB36, {0x05D6, 0x05BC}}, // 'זּ' - {0xFB38, {0x05D8, 0x05BC}}, // 'טּ' - {0xFB39, {0x05D9, 0x05BC}}, // 'יּ' - {0xFB3A, {0x05DA, 0x05BC}}, // 'ךּ' - {0xFB3B, {0x05DB, 0x05BC}}, // 'כּ' - {0xFB3C, {0x05DC, 0x05BC}}, // 'לּ' - {0xFB3E, {0x05DE, 0x05BC}}, // 'מּ' - {0xFB40, {0x05E0, 0x05BC}}, // 'נּ' - {0xFB41, {0x05E1, 0x05BC}}, // 'סּ' - {0xFB43, {0x05E3, 0x05BC}}, // 'ףּ' - {0xFB44, {0x05E4, 0x05BC}}, // 'פּ' - {0xFB46, {0x05E6, 0x05BC}}, // 'צּ' - {0xFB47, {0x05E7, 0x05BC}}, // 'קּ' - {0xFB48, {0x05E8, 0x05BC}}, // 'רּ' - {0xFB49, {0x05E9, 0x05BC}}, // 'שּ' - {0xFB4A, {0x05EA, 0x05BC}}, // 'תּ' - {0xFB4B, {0x05D5, 0x05B9}}, // 'וֹ' - {0xFB4C, {0x05D1, 0x05BF}}, // 'בֿ' - {0xFB4D, {0x05DB, 0x05BF}}, // 'כֿ' - {0xFB4E, {0x05E4, 0x05BF}}, // 'פֿ' - {0x105C9, {0x105D2, 0x0307}}, - {0x105E4, {0x105DA, 0x0307}}, - {0x1109A, {0x11099, 0x110BA}}, // '𑂚' - {0x1109C, {0x1109B, 0x110BA}}, // '𑂜' - {0x110AB, {0x110A5, 0x110BA}}, // '𑂫' - {0x1112E, {0x11131, 0x11127}}, // '𑄮' - {0x1112F, {0x11132, 0x11127}}, // '𑄯' - {0x1134B, {0x11347, 0x1133E}}, // '𑍋' - {0x1134C, {0x11347, 0x11357}}, // '𑍌' - {0x11383, {0x11382, 0x113C9}}, - {0x11385, {0x11384, 0x113BB}}, - {0x1138E, {0x1138B, 0x113C2}}, - {0x11391, {0x11390, 0x113C9}}, - {0x113C5, {0x113C2, 0x113C2}}, - {0x113C7, {0x113C2, 0x113B8}}, - {0x113C8, {0x113C2, 0x113C9}}, - {0x114BB, {0x114B9, 0x114BA}}, // '𑒻' - {0x114BC, {0x114B9, 0x114B0}}, // '𑒼' - {0x114BE, {0x114B9, 0x114BD}}, // '𑒾' - {0x115BA, {0x115B8, 0x115AF}}, // '𑖺' - {0x115BB, {0x115B9, 0x115AF}}, // '𑖻' - {0x11938, {0x11935, 0x11930}}, // '𑤸' - {0x16121, {0x1611E, 0x1611E}}, - {0x16122, {0x1611E, 0x16129}}, - {0x16123, {0x1611E, 0x1611F}}, - {0x16124, {0x16129, 0x1611F}}, - {0x16125, {0x1611E, 0x16120}}, - {0x16126, {0x1611E, 0x1611E, 0x1611F}}, - {0x16127, {0x1611E, 0x16129, 0x1611F}}, - {0x16128, {0x1611E, 0x1611E, 0x16120}}, - {0x16D68, {0x16D67, 0x16D67}}, - {0x16D69, {0x16D63, 0x16D67}}, - {0x16D6A, {0x16D63, 0x16D67, 0x16D67}}, - {0x1D15E, {0x1D157, 0x1D165}}, // '𝅗𝅥' - {0x1D15F, {0x1D158, 0x1D165}}, // '𝅘𝅥' - {0x1D160, {0x1D158, 0x1D165, 0x1D16E}}, // '𝅘𝅥𝅮' - {0x1D161, {0x1D158, 0x1D165, 0x1D16F}}, // '𝅘𝅥𝅯' - {0x1D162, {0x1D158, 0x1D165, 0x1D170}}, // '𝅘𝅥𝅰' - {0x1D163, {0x1D158, 0x1D165, 0x1D171}}, // '𝅘𝅥𝅱' - {0x1D164, {0x1D158, 0x1D165, 0x1D172}}, // '𝅘𝅥𝅲' - {0x1D1BB, {0x1D1B9, 0x1D165}}, // '𝆹𝅥' - {0x1D1BC, {0x1D1BA, 0x1D165}}, // '𝆺𝅥' - {0x1D1BD, {0x1D1B9, 0x1D165, 0x1D16E}}, // '𝆹𝅥𝅮' - {0x1D1BE, {0x1D1BA, 0x1D165, 0x1D16E}}, // '𝆺𝅥𝅮' - {0x1D1BF, {0x1D1B9, 0x1D165, 0x1D16F}}, // '𝆹𝅥𝅯' - {0x1D1C0, {0x1D1BA, 0x1D165, 0x1D16F}}, // '𝆺𝅥𝅯' - {0x2F800, {0x4E3D}}, // '丽' - {0x2F801, {0x4E38}}, // '丸' - {0x2F802, {0x4E41}}, // '乁' - {0x2F803, {0x20122}}, // '𠄢' - {0x2F804, {0x4F60}}, // '你' - {0x2F805, {0x4FAE}}, // '侮' - {0x2F806, {0x4FBB}}, // '侻' - {0x2F807, {0x5002}}, // '倂' - {0x2F808, {0x507A}}, // '偺' - {0x2F809, {0x5099}}, // '備' - {0x2F80A, {0x50E7}}, // '僧' - {0x2F80B, {0x50CF}}, // '像' - {0x2F80C, {0x349E}}, // '㒞' - {0x2F80D, {0x2063A}}, // '𠘺' - {0x2F80E, {0x514D}}, // '免' - {0x2F80F, {0x5154}}, // '兔' - {0x2F810, {0x5164}}, // '兤' - {0x2F811, {0x5177}}, // '具' - {0x2F812, {0x2051C}}, // '𠔜' - {0x2F813, {0x34B9}}, // '㒹' - {0x2F814, {0x5167}}, // '內' - {0x2F815, {0x518D}}, // '再' - {0x2F816, {0x2054B}}, // '𠕋' - {0x2F817, {0x5197}}, // '冗' - {0x2F818, {0x51A4}}, // '冤' - {0x2F819, {0x4ECC}}, // '仌' - {0x2F81A, {0x51AC}}, // '冬' - {0x2F81B, {0x51B5}}, // '况' - {0x2F81C, {0x291DF}}, // '𩇟' - {0x2F81D, {0x51F5}}, // '凵' - {0x2F81E, {0x5203}}, // '刃' - {0x2F81F, {0x34DF}}, // '㓟' - {0x2F820, {0x523B}}, // '刻' - {0x2F821, {0x5246}}, // '剆' - {0x2F822, {0x5272}}, // '割' - {0x2F823, {0x5277}}, // '剷' - {0x2F824, {0x3515}}, // '㔕' - {0x2F825, {0x52C7}}, // '勇' - {0x2F826, {0x52C9}}, // '勉' - {0x2F827, {0x52E4}}, // '勤' - {0x2F828, {0x52FA}}, // '勺' - {0x2F829, {0x5305}}, // '包' - {0x2F82A, {0x5306}}, // '匆' - {0x2F82B, {0x5317}}, // '北' - {0x2F82C, {0x5349}}, // '卉' - {0x2F82D, {0x5351}}, // '卑' - {0x2F82E, {0x535A}}, // '博' - {0x2F82F, {0x5373}}, // '即' - {0x2F830, {0x537D}}, // '卽' - {0x2F831, {0x537F}}, // '卿' - {0x2F832, {0x537F}}, // '卿' - {0x2F833, {0x537F}}, // '卿' - {0x2F834, {0x20A2C}}, // '𠨬' - {0x2F835, {0x7070}}, // '灰' - {0x2F836, {0x53CA}}, // '及' - {0x2F837, {0x53DF}}, // '叟' - {0x2F838, {0x20B63}}, // '𠭣' - {0x2F839, {0x53EB}}, // '叫' - {0x2F83A, {0x53F1}}, // '叱' - {0x2F83B, {0x5406}}, // '吆' - {0x2F83C, {0x549E}}, // '咞' - {0x2F83D, {0x5438}}, // '吸' - {0x2F83E, {0x5448}}, // '呈' - {0x2F83F, {0x5468}}, // '周' - {0x2F840, {0x54A2}}, // '咢' - {0x2F841, {0x54F6}}, // '哶' - {0x2F842, {0x5510}}, // '唐' - {0x2F843, {0x5553}}, // '啓' - {0x2F844, {0x5563}}, // '啣' - {0x2F845, {0x5584}}, // '善' - {0x2F846, {0x5584}}, // '善' - {0x2F847, {0x5599}}, // '喙' - {0x2F848, {0x55AB}}, // '喫' - {0x2F849, {0x55B3}}, // '喳' - {0x2F84A, {0x55C2}}, // '嗂' - {0x2F84B, {0x5716}}, // '圖' - {0x2F84C, {0x5606}}, // '嘆' - {0x2F84D, {0x5717}}, // '圗' - {0x2F84E, {0x5651}}, // '噑' - {0x2F84F, {0x5674}}, // '噴' - {0x2F850, {0x5207}}, // '切' - {0x2F851, {0x58EE}}, // '壮' - {0x2F852, {0x57CE}}, // '城' - {0x2F853, {0x57F4}}, // '埴' - {0x2F854, {0x580D}}, // '堍' - {0x2F855, {0x578B}}, // '型' - {0x2F856, {0x5832}}, // '堲' - {0x2F857, {0x5831}}, // '報' - {0x2F858, {0x58AC}}, // '墬' - {0x2F859, {0x214E4}}, // '𡓤' - {0x2F85A, {0x58F2}}, // '売' - {0x2F85B, {0x58F7}}, // '壷' - {0x2F85C, {0x5906}}, // '夆' - {0x2F85D, {0x591A}}, // '多' - {0x2F85E, {0x5922}}, // '夢' - {0x2F85F, {0x5962}}, // '奢' - {0x2F860, {0x216A8}}, // '𡚨' - {0x2F861, {0x216EA}}, // '𡛪' - {0x2F862, {0x59EC}}, // '姬' - {0x2F863, {0x5A1B}}, // '娛' - {0x2F864, {0x5A27}}, // '娧' - {0x2F865, {0x59D8}}, // '姘' - {0x2F866, {0x5A66}}, // '婦' - {0x2F867, {0x36EE}}, // '㛮' - {0x2F868, {0x36FC}}, // '㛼' - {0x2F869, {0x5B08}}, // '嬈' - {0x2F86A, {0x5B3E}}, // '嬾' - {0x2F86B, {0x5B3E}}, // '嬾' - {0x2F86C, {0x219C8}}, // '𡧈' - {0x2F86D, {0x5BC3}}, // '寃' - {0x2F86E, {0x5BD8}}, // '寘' - {0x2F86F, {0x5BE7}}, // '寧' - {0x2F870, {0x5BF3}}, // '寳' - {0x2F871, {0x21B18}}, // '𡬘' - {0x2F872, {0x5BFF}}, // '寿' - {0x2F873, {0x5C06}}, // '将' - {0x2F874, {0x5F53}}, // '当' - {0x2F875, {0x5C22}}, // '尢' - {0x2F876, {0x3781}}, // '㞁' - {0x2F877, {0x5C60}}, // '屠' - {0x2F878, {0x5C6E}}, // '屮' - {0x2F879, {0x5CC0}}, // '峀' - {0x2F87A, {0x5C8D}}, // '岍' - {0x2F87B, {0x21DE4}}, // '𡷤' - {0x2F87C, {0x5D43}}, // '嵃' - {0x2F87D, {0x21DE6}}, // '𡷦' - {0x2F87E, {0x5D6E}}, // '嵮' - {0x2F87F, {0x5D6B}}, // '嵫' - {0x2F880, {0x5D7C}}, // '嵼' - {0x2F881, {0x5DE1}}, // '巡' - {0x2F882, {0x5DE2}}, // '巢' - {0x2F883, {0x382F}}, // '㠯' - {0x2F884, {0x5DFD}}, // '巽' - {0x2F885, {0x5E28}}, // '帨' - {0x2F886, {0x5E3D}}, // '帽' - {0x2F887, {0x5E69}}, // '幩' - {0x2F888, {0x3862}}, // '㡢' - {0x2F889, {0x22183}}, // '𢆃' - {0x2F88A, {0x387C}}, // '㡼' - {0x2F88B, {0x5EB0}}, // '庰' - {0x2F88C, {0x5EB3}}, // '庳' - {0x2F88D, {0x5EB6}}, // '庶' - {0x2F88E, {0x5ECA}}, // '廊' - {0x2F88F, {0x2A392}}, // '𪎒' - {0x2F890, {0x5EFE}}, // '廾' - {0x2F891, {0x22331}}, // '𢌱' - {0x2F892, {0x22331}}, // '𢌱' - {0x2F893, {0x8201}}, // '舁' - {0x2F894, {0x5F22}}, // '弢' - {0x2F895, {0x5F22}}, // '弢' - {0x2F896, {0x38C7}}, // '㣇' - {0x2F897, {0x232B8}}, // '𣊸' - {0x2F898, {0x261DA}}, // '𦇚' - {0x2F899, {0x5F62}}, // '形' - {0x2F89A, {0x5F6B}}, // '彫' - {0x2F89B, {0x38E3}}, // '㣣' - {0x2F89C, {0x5F9A}}, // '徚' - {0x2F89D, {0x5FCD}}, // '忍' - {0x2F89E, {0x5FD7}}, // '志' - {0x2F89F, {0x5FF9}}, // '忹' - {0x2F8A0, {0x6081}}, // '悁' - {0x2F8A1, {0x393A}}, // '㤺' - {0x2F8A2, {0x391C}}, // '㤜' - {0x2F8A3, {0x6094}}, // '悔' - {0x2F8A4, {0x226D4}}, // '𢛔' - {0x2F8A5, {0x60C7}}, // '惇' - {0x2F8A6, {0x6148}}, // '慈' - {0x2F8A7, {0x614C}}, // '慌' - {0x2F8A8, {0x614E}}, // '慎' - {0x2F8A9, {0x614C}}, // '慌' - {0x2F8AA, {0x617A}}, // '慺' - {0x2F8AB, {0x618E}}, // '憎' - {0x2F8AC, {0x61B2}}, // '憲' - {0x2F8AD, {0x61A4}}, // '憤' - {0x2F8AE, {0x61AF}}, // '憯' - {0x2F8AF, {0x61DE}}, // '懞' - {0x2F8B0, {0x61F2}}, // '懲' - {0x2F8B1, {0x61F6}}, // '懶' - {0x2F8B2, {0x6210}}, // '成' - {0x2F8B3, {0x621B}}, // '戛' - {0x2F8B4, {0x625D}}, // '扝' - {0x2F8B5, {0x62B1}}, // '抱' - {0x2F8B6, {0x62D4}}, // '拔' - {0x2F8B7, {0x6350}}, // '捐' - {0x2F8B8, {0x22B0C}}, // '𢬌' - {0x2F8B9, {0x633D}}, // '挽' - {0x2F8BA, {0x62FC}}, // '拼' - {0x2F8BB, {0x6368}}, // '捨' - {0x2F8BC, {0x6383}}, // '掃' - {0x2F8BD, {0x63E4}}, // '揤' - {0x2F8BE, {0x22BF1}}, // '𢯱' - {0x2F8BF, {0x6422}}, // '搢' - {0x2F8C0, {0x63C5}}, // '揅' - {0x2F8C1, {0x63A9}}, // '掩' - {0x2F8C2, {0x3A2E}}, // '㨮' - {0x2F8C3, {0x6469}}, // '摩' - {0x2F8C4, {0x647E}}, // '摾' - {0x2F8C5, {0x649D}}, // '撝' - {0x2F8C6, {0x6477}}, // '摷' - {0x2F8C7, {0x3A6C}}, // '㩬' - {0x2F8C8, {0x654F}}, // '敏' - {0x2F8C9, {0x656C}}, // '敬' - {0x2F8CA, {0x2300A}}, // '𣀊' - {0x2F8CB, {0x65E3}}, // '旣' - {0x2F8CC, {0x66F8}}, // '書' - {0x2F8CD, {0x6649}}, // '晉' - {0x2F8CE, {0x3B19}}, // '㬙' - {0x2F8CF, {0x6691}}, // '暑' - {0x2F8D0, {0x3B08}}, // '㬈' - {0x2F8D1, {0x3AE4}}, // '㫤' - {0x2F8D2, {0x5192}}, // '冒' - {0x2F8D3, {0x5195}}, // '冕' - {0x2F8D4, {0x6700}}, // '最' - {0x2F8D5, {0x669C}}, // '暜' - {0x2F8D6, {0x80AD}}, // '肭' - {0x2F8D7, {0x43D9}}, // '䏙' - {0x2F8D8, {0x6717}}, // '朗' - {0x2F8D9, {0x671B}}, // '望' - {0x2F8DA, {0x6721}}, // '朡' - {0x2F8DB, {0x675E}}, // '杞' - {0x2F8DC, {0x6753}}, // '杓' - {0x2F8DD, {0x233C3}}, // '𣏃' - {0x2F8DE, {0x3B49}}, // '㭉' - {0x2F8DF, {0x67FA}}, // '柺' - {0x2F8E0, {0x6785}}, // '枅' - {0x2F8E1, {0x6852}}, // '桒' - {0x2F8E2, {0x6885}}, // '梅' - {0x2F8E3, {0x2346D}}, // '𣑭' - {0x2F8E4, {0x688E}}, // '梎' - {0x2F8E5, {0x681F}}, // '栟' - {0x2F8E6, {0x6914}}, // '椔' - {0x2F8E7, {0x3B9D}}, // '㮝' - {0x2F8E8, {0x6942}}, // '楂' - {0x2F8E9, {0x69A3}}, // '榣' - {0x2F8EA, {0x69EA}}, // '槪' - {0x2F8EB, {0x6AA8}}, // '檨' - {0x2F8EC, {0x236A3}}, // '𣚣' - {0x2F8ED, {0x6ADB}}, // '櫛' - {0x2F8EE, {0x3C18}}, // '㰘' - {0x2F8EF, {0x6B21}}, // '次' - {0x2F8F0, {0x238A7}}, // '𣢧' - {0x2F8F1, {0x6B54}}, // '歔' - {0x2F8F2, {0x3C4E}}, // '㱎' - {0x2F8F3, {0x6B72}}, // '歲' - {0x2F8F4, {0x6B9F}}, // '殟' - {0x2F8F5, {0x6BBA}}, // '殺' - {0x2F8F6, {0x6BBB}}, // '殻' - {0x2F8F7, {0x23A8D}}, // '𣪍' - {0x2F8F8, {0x21D0B}}, // '𡴋' - {0x2F8F9, {0x23AFA}}, // '𣫺' - {0x2F8FA, {0x6C4E}}, // '汎' - {0x2F8FB, {0x23CBC}}, // '𣲼' - {0x2F8FC, {0x6CBF}}, // '沿' - {0x2F8FD, {0x6CCD}}, // '泍' - {0x2F8FE, {0x6C67}}, // '汧' - {0x2F8FF, {0x6D16}}, // '洖' - {0x2F900, {0x6D3E}}, // '派' - {0x2F901, {0x6D77}}, // '海' - {0x2F902, {0x6D41}}, // '流' - {0x2F903, {0x6D69}}, // '浩' - {0x2F904, {0x6D78}}, // '浸' - {0x2F905, {0x6D85}}, // '涅' - {0x2F906, {0x23D1E}}, // '𣴞' - {0x2F907, {0x6D34}}, // '洴' - {0x2F908, {0x6E2F}}, // '港' - {0x2F909, {0x6E6E}}, // '湮' - {0x2F90A, {0x3D33}}, // '㴳' - {0x2F90B, {0x6ECB}}, // '滋' - {0x2F90C, {0x6EC7}}, // '滇' - {0x2F90D, {0x23ED1}}, // '𣻑' - {0x2F90E, {0x6DF9}}, // '淹' - {0x2F90F, {0x6F6E}}, // '潮' - {0x2F910, {0x23F5E}}, // '𣽞' - {0x2F911, {0x23F8E}}, // '𣾎' - {0x2F912, {0x6FC6}}, // '濆' - {0x2F913, {0x7039}}, // '瀹' - {0x2F914, {0x701E}}, // '瀞' - {0x2F915, {0x701B}}, // '瀛' - {0x2F916, {0x3D96}}, // '㶖' - {0x2F917, {0x704A}}, // '灊' - {0x2F918, {0x707D}}, // '災' - {0x2F919, {0x7077}}, // '灷' - {0x2F91A, {0x70AD}}, // '炭' - {0x2F91B, {0x20525}}, // '𠔥' - {0x2F91C, {0x7145}}, // '煅' - {0x2F91D, {0x24263}}, // '𤉣' - {0x2F91E, {0x719C}}, // '熜' - {0x2F91F, {0x243AB}}, // '𤎫' - {0x2F920, {0x7228}}, // '爨' - {0x2F921, {0x7235}}, // '爵' - {0x2F922, {0x7250}}, // '牐' - {0x2F923, {0x24608}}, // '𤘈' - {0x2F924, {0x7280}}, // '犀' - {0x2F925, {0x7295}}, // '犕' - {0x2F926, {0x24735}}, // '𤜵' - {0x2F927, {0x24814}}, // '𤠔' - {0x2F928, {0x737A}}, // '獺' - {0x2F929, {0x738B}}, // '王' - {0x2F92A, {0x3EAC}}, // '㺬' - {0x2F92B, {0x73A5}}, // '玥' - {0x2F92C, {0x3EB8}}, // '㺸' - {0x2F92D, {0x3EB8}}, // '㺸' - {0x2F92E, {0x7447}}, // '瑇' - {0x2F92F, {0x745C}}, // '瑜' - {0x2F930, {0x7471}}, // '瑱' - {0x2F931, {0x7485}}, // '璅' - {0x2F932, {0x74CA}}, // '瓊' - {0x2F933, {0x3F1B}}, // '㼛' - {0x2F934, {0x7524}}, // '甤' - {0x2F935, {0x24C36}}, // '𤰶' - {0x2F936, {0x753E}}, // '甾' - {0x2F937, {0x24C92}}, // '𤲒' - {0x2F938, {0x7570}}, // '異' - {0x2F939, {0x2219F}}, // '𢆟' - {0x2F93A, {0x7610}}, // '瘐' - {0x2F93B, {0x24FA1}}, // '𤾡' - {0x2F93C, {0x24FB8}}, // '𤾸' - {0x2F93D, {0x25044}}, // '𥁄' - {0x2F93E, {0x3FFC}}, // '㿼' - {0x2F93F, {0x4008}}, // '䀈' - {0x2F940, {0x76F4}}, // '直' - {0x2F941, {0x250F3}}, // '𥃳' - {0x2F942, {0x250F2}}, // '𥃲' - {0x2F943, {0x25119}}, // '𥄙' - {0x2F944, {0x25133}}, // '𥄳' - {0x2F945, {0x771E}}, // '眞' - {0x2F946, {0x771F}}, // '真' - {0x2F947, {0x771F}}, // '真' - {0x2F948, {0x774A}}, // '睊' - {0x2F949, {0x4039}}, // '䀹' - {0x2F94A, {0x778B}}, // '瞋' - {0x2F94B, {0x4046}}, // '䁆' - {0x2F94C, {0x4096}}, // '䂖' - {0x2F94D, {0x2541D}}, // '𥐝' - {0x2F94E, {0x784E}}, // '硎' - {0x2F94F, {0x788C}}, // '碌' - {0x2F950, {0x78CC}}, // '磌' - {0x2F951, {0x40E3}}, // '䃣' - {0x2F952, {0x25626}}, // '𥘦' - {0x2F953, {0x7956}}, // '祖' - {0x2F954, {0x2569A}}, // '𥚚' - {0x2F955, {0x256C5}}, // '𥛅' - {0x2F956, {0x798F}}, // '福' - {0x2F957, {0x79EB}}, // '秫' - {0x2F958, {0x412F}}, // '䄯' - {0x2F959, {0x7A40}}, // '穀' - {0x2F95A, {0x7A4A}}, // '穊' - {0x2F95B, {0x7A4F}}, // '穏' - {0x2F95C, {0x2597C}}, // '𥥼' - {0x2F95D, {0x25AA7}}, // '𥪧' - {0x2F95E, {0x25AA7}}, // '𥪧' - {0x2F95F, {0x7AEE}}, // '竮' - {0x2F960, {0x4202}}, // '䈂' - {0x2F961, {0x25BAB}}, // '𥮫' - {0x2F962, {0x7BC6}}, // '篆' - {0x2F963, {0x7BC9}}, // '築' - {0x2F964, {0x4227}}, // '䈧' - {0x2F965, {0x25C80}}, // '𥲀' - {0x2F966, {0x7CD2}}, // '糒' - {0x2F967, {0x42A0}}, // '䊠' - {0x2F968, {0x7CE8}}, // '糨' - {0x2F969, {0x7CE3}}, // '糣' - {0x2F96A, {0x7D00}}, // '紀' - {0x2F96B, {0x25F86}}, // '𥾆' - {0x2F96C, {0x7D63}}, // '絣' - {0x2F96D, {0x4301}}, // '䌁' - {0x2F96E, {0x7DC7}}, // '緇' - {0x2F96F, {0x7E02}}, // '縂' - {0x2F970, {0x7E45}}, // '繅' - {0x2F971, {0x4334}}, // '䌴' - {0x2F972, {0x26228}}, // '𦈨' - {0x2F973, {0x26247}}, // '𦉇' - {0x2F974, {0x4359}}, // '䍙' - {0x2F975, {0x262D9}}, // '𦋙' - {0x2F976, {0x7F7A}}, // '罺' - {0x2F977, {0x2633E}}, // '𦌾' - {0x2F978, {0x7F95}}, // '羕' - {0x2F979, {0x7FFA}}, // '翺' - {0x2F97A, {0x8005}}, // '者' - {0x2F97B, {0x264DA}}, // '𦓚' - {0x2F97C, {0x26523}}, // '𦔣' - {0x2F97D, {0x8060}}, // '聠' - {0x2F97E, {0x265A8}}, // '𦖨' - {0x2F97F, {0x8070}}, // '聰' - {0x2F980, {0x2335F}}, // '𣍟' - {0x2F981, {0x43D5}}, // '䏕' - {0x2F982, {0x80B2}}, // '育' - {0x2F983, {0x8103}}, // '脃' - {0x2F984, {0x440B}}, // '䐋' - {0x2F985, {0x813E}}, // '脾' - {0x2F986, {0x5AB5}}, // '媵' - {0x2F987, {0x267A7}}, // '𦞧' - {0x2F988, {0x267B5}}, // '𦞵' - {0x2F989, {0x23393}}, // '𣎓' - {0x2F98A, {0x2339C}}, // '𣎜' - {0x2F98B, {0x8201}}, // '舁' - {0x2F98C, {0x8204}}, // '舄' - {0x2F98D, {0x8F9E}}, // '辞' - {0x2F98E, {0x446B}}, // '䑫' - {0x2F98F, {0x8291}}, // '芑' - {0x2F990, {0x828B}}, // '芋' - {0x2F991, {0x829D}}, // '芝' - {0x2F992, {0x52B3}}, // '劳' - {0x2F993, {0x82B1}}, // '花' - {0x2F994, {0x82B3}}, // '芳' - {0x2F995, {0x82BD}}, // '芽' - {0x2F996, {0x82E6}}, // '苦' - {0x2F997, {0x26B3C}}, // '𦬼' - {0x2F998, {0x82E5}}, // '若' - {0x2F999, {0x831D}}, // '茝' - {0x2F99A, {0x8363}}, // '荣' - {0x2F99B, {0x83AD}}, // '莭' - {0x2F99C, {0x8323}}, // '茣' - {0x2F99D, {0x83BD}}, // '莽' - {0x2F99E, {0x83E7}}, // '菧' - {0x2F99F, {0x8457}}, // '著' - {0x2F9A0, {0x8353}}, // '荓' - {0x2F9A1, {0x83CA}}, // '菊' - {0x2F9A2, {0x83CC}}, // '菌' - {0x2F9A3, {0x83DC}}, // '菜' - {0x2F9A4, {0x26C36}}, // '𦰶' - {0x2F9A5, {0x26D6B}}, // '𦵫' - {0x2F9A6, {0x26CD5}}, // '𦳕' - {0x2F9A7, {0x452B}}, // '䔫' - {0x2F9A8, {0x84F1}}, // '蓱' - {0x2F9A9, {0x84F3}}, // '蓳' - {0x2F9AA, {0x8516}}, // '蔖' - {0x2F9AB, {0x273CA}}, // '𧏊' - {0x2F9AC, {0x8564}}, // '蕤' - {0x2F9AD, {0x26F2C}}, // '𦼬' - {0x2F9AE, {0x455D}}, // '䕝' - {0x2F9AF, {0x4561}}, // '䕡' - {0x2F9B0, {0x26FB1}}, // '𦾱' - {0x2F9B1, {0x270D2}}, // '𧃒' - {0x2F9B2, {0x456B}}, // '䕫' - {0x2F9B3, {0x8650}}, // '虐' - {0x2F9B4, {0x865C}}, // '虜' - {0x2F9B5, {0x8667}}, // '虧' - {0x2F9B6, {0x8669}}, // '虩' - {0x2F9B7, {0x86A9}}, // '蚩' - {0x2F9B8, {0x8688}}, // '蚈' - {0x2F9B9, {0x870E}}, // '蜎' - {0x2F9BA, {0x86E2}}, // '蛢' - {0x2F9BB, {0x8779}}, // '蝹' - {0x2F9BC, {0x8728}}, // '蜨' - {0x2F9BD, {0x876B}}, // '蝫' - {0x2F9BE, {0x8786}}, // '螆' - {0x2F9BF, {0x45D7}}, // '䗗' - {0x2F9C0, {0x87E1}}, // '蟡' - {0x2F9C1, {0x8801}}, // '蠁' - {0x2F9C2, {0x45F9}}, // '䗹' - {0x2F9C3, {0x8860}}, // '衠' - {0x2F9C4, {0x8863}}, // '衣' - {0x2F9C5, {0x27667}}, // '𧙧' - {0x2F9C6, {0x88D7}}, // '裗' - {0x2F9C7, {0x88DE}}, // '裞' - {0x2F9C8, {0x4635}}, // '䘵' - {0x2F9C9, {0x88FA}}, // '裺' - {0x2F9CA, {0x34BB}}, // '㒻' - {0x2F9CB, {0x278AE}}, // '𧢮' - {0x2F9CC, {0x27966}}, // '𧥦' - {0x2F9CD, {0x46BE}}, // '䚾' - {0x2F9CE, {0x46C7}}, // '䛇' - {0x2F9CF, {0x8AA0}}, // '誠' - {0x2F9D0, {0x8AED}}, // '諭' - {0x2F9D1, {0x8B8A}}, // '變' - {0x2F9D2, {0x8C55}}, // '豕' - {0x2F9D3, {0x27CA8}}, // '𧲨' - {0x2F9D4, {0x8CAB}}, // '貫' - {0x2F9D5, {0x8CC1}}, // '賁' - {0x2F9D6, {0x8D1B}}, // '贛' - {0x2F9D7, {0x8D77}}, // '起' - {0x2F9D8, {0x27F2F}}, // '𧼯' - {0x2F9D9, {0x20804}}, // '𠠄' - {0x2F9DA, {0x8DCB}}, // '跋' - {0x2F9DB, {0x8DBC}}, // '趼' - {0x2F9DC, {0x8DF0}}, // '跰' - {0x2F9DD, {0x208DE}}, // '𠣞' - {0x2F9DE, {0x8ED4}}, // '軔' - {0x2F9DF, {0x8F38}}, // '輸' - {0x2F9E0, {0x285D2}}, // '𨗒' - {0x2F9E1, {0x285ED}}, // '𨗭' - {0x2F9E2, {0x9094}}, // '邔' - {0x2F9E3, {0x90F1}}, // '郱' - {0x2F9E4, {0x9111}}, // '鄑' - {0x2F9E5, {0x2872E}}, // '𨜮' - {0x2F9E6, {0x911B}}, // '鄛' - {0x2F9E7, {0x9238}}, // '鈸' - {0x2F9E8, {0x92D7}}, // '鋗' - {0x2F9E9, {0x92D8}}, // '鋘' - {0x2F9EA, {0x927C}}, // '鉼' - {0x2F9EB, {0x93F9}}, // '鏹' - {0x2F9EC, {0x9415}}, // '鐕' - {0x2F9ED, {0x28BFA}}, // '𨯺' - {0x2F9EE, {0x958B}}, // '開' - {0x2F9EF, {0x4995}}, // '䦕' - {0x2F9F0, {0x95B7}}, // '閷' - {0x2F9F1, {0x28D77}}, // '𨵷' - {0x2F9F2, {0x49E6}}, // '䧦' - {0x2F9F3, {0x96C3}}, // '雃' - {0x2F9F4, {0x5DB2}}, // '嶲' - {0x2F9F5, {0x9723}}, // '霣' - {0x2F9F6, {0x29145}}, // '𩅅' - {0x2F9F7, {0x2921A}}, // '𩈚' - {0x2F9F8, {0x4A6E}}, // '䩮' - {0x2F9F9, {0x4A76}}, // '䩶' - {0x2F9FA, {0x97E0}}, // '韠' - {0x2F9FB, {0x2940A}}, // '𩐊' - {0x2F9FC, {0x4AB2}}, // '䪲' - {0x2F9FD, {0x29496}}, // '𩒖' - {0x2F9FE, {0x980B}}, // '頋' - {0x2F9FF, {0x980B}}, // '頋' - {0x2FA00, {0x9829}}, // '頩' - {0x2FA01, {0x295B6}}, // '𩖶' - {0x2FA02, {0x98E2}}, // '飢' - {0x2FA03, {0x4B33}}, // '䬳' - {0x2FA04, {0x9929}}, // '餩' - {0x2FA05, {0x99A7}}, // '馧' - {0x2FA06, {0x99C2}}, // '駂' - {0x2FA07, {0x99FE}}, // '駾' - {0x2FA08, {0x4BCE}}, // '䯎' - {0x2FA09, {0x29B30}}, // '𩬰' - {0x2FA0A, {0x9B12}}, // '鬒' - {0x2FA0B, {0x9C40}}, // '鱀' - {0x2FA0C, {0x9CFD}}, // '鳽' - {0x2FA0D, {0x4CCE}}, // '䳎' - {0x2FA0E, {0x4CED}}, // '䳭' - {0x2FA0F, {0x9D67}}, // '鵧' - {0x2FA10, {0x2A0CE}}, // '𪃎' - {0x2FA11, {0x4CF8}}, // '䳸' - {0x2FA12, {0x2A105}}, // '𪄅' - {0x2FA13, {0x2A20E}}, // '𪈎' - {0x2FA14, {0x2A291}}, // '𪊑' - {0x2FA15, {0x9EBB}}, // '麻' - {0x2FA16, {0x4D56}}, // '䵖' - {0x2FA17, {0x9EF9}}, // '黹' - {0x2FA18, {0x9EFE}}, // '黾' - {0x2FA19, {0x9F05}}, // '鼅' - {0x2FA1A, {0x9F0F}}, // '鼏' - {0x2FA1B, {0x9F16}}, // '鼖' - {0x2FA1C, {0x9F3B}}, // '鼻' - {0x2FA1D, {0x2A600}}, // '𪘀' +static const std::unordered_map> nfd_decomposition_table = { + {0x00C0, {0x0041, 0x0300}}, // 'À' + {0x00C1, {0x0041, 0x0301}}, // 'Á' + {0x00C2, {0x0041, 0x0302}}, // 'Â' + {0x00C3, {0x0041, 0x0303}}, // 'Ã' + {0x00C4, {0x0041, 0x0308}}, // 'Ä' + {0x00C5, {0x0041, 0x030A}}, // 'Å' + {0x00C7, {0x0043, 0x0327}}, // 'Ç' + {0x00C8, {0x0045, 0x0300}}, // 'È' + {0x00C9, {0x0045, 0x0301}}, // 'É' + {0x00CA, {0x0045, 0x0302}}, // 'Ê' + {0x00CB, {0x0045, 0x0308}}, // 'Ë' + {0x00CC, {0x0049, 0x0300}}, // 'Ì' + {0x00CD, {0x0049, 0x0301}}, // 'Í' + {0x00CE, {0x0049, 0x0302}}, // 'Î' + {0x00CF, {0x0049, 0x0308}}, // 'Ï' + {0x00D1, {0x004E, 0x0303}}, // 'Ñ' + {0x00D2, {0x004F, 0x0300}}, // 'Ò' + {0x00D3, {0x004F, 0x0301}}, // 'Ó' + {0x00D4, {0x004F, 0x0302}}, // 'Ô' + {0x00D5, {0x004F, 0x0303}}, // 'Õ' + {0x00D6, {0x004F, 0x0308}}, // 'Ö' + {0x00D9, {0x0055, 0x0300}}, // 'Ù' + {0x00DA, {0x0055, 0x0301}}, // 'Ú' + {0x00DB, {0x0055, 0x0302}}, // 'Û' + {0x00DC, {0x0055, 0x0308}}, // 'Ü' + {0x00DD, {0x0059, 0x0301}}, // 'Ý' + {0x00E0, {0x0061, 0x0300}}, // 'à' + {0x00E1, {0x0061, 0x0301}}, // 'á' + {0x00E2, {0x0061, 0x0302}}, // 'â' + {0x00E3, {0x0061, 0x0303}}, // 'ã' + {0x00E4, {0x0061, 0x0308}}, // 'ä' + {0x00E5, {0x0061, 0x030A}}, // 'å' + {0x00E7, {0x0063, 0x0327}}, // 'ç' + {0x00E8, {0x0065, 0x0300}}, // 'è' + {0x00E9, {0x0065, 0x0301}}, // 'é' + {0x00EA, {0x0065, 0x0302}}, // 'ê' + {0x00EB, {0x0065, 0x0308}}, // 'ë' + {0x00EC, {0x0069, 0x0300}}, // 'ì' + {0x00ED, {0x0069, 0x0301}}, // 'í' + {0x00EE, {0x0069, 0x0302}}, // 'î' + {0x00EF, {0x0069, 0x0308}}, // 'ï' + {0x00F1, {0x006E, 0x0303}}, // 'ñ' + {0x00F2, {0x006F, 0x0300}}, // 'ò' + {0x00F3, {0x006F, 0x0301}}, // 'ó' + {0x00F4, {0x006F, 0x0302}}, // 'ô' + {0x00F5, {0x006F, 0x0303}}, // 'õ' + {0x00F6, {0x006F, 0x0308}}, // 'ö' + {0x00F9, {0x0075, 0x0300}}, // 'ù' + {0x00FA, {0x0075, 0x0301}}, // 'ú' + {0x00FB, {0x0075, 0x0302}}, // 'û' + {0x00FC, {0x0075, 0x0308}}, // 'ü' + {0x00FD, {0x0079, 0x0301}}, // 'ý' + {0x00FF, {0x0079, 0x0308}}, // 'ÿ' + {0x0100, {0x0041, 0x0304}}, // 'Ā' + {0x0101, {0x0061, 0x0304}}, // 'ā' + {0x0102, {0x0041, 0x0306}}, // 'Ă' + {0x0103, {0x0061, 0x0306}}, // 'ă' + {0x0104, {0x0041, 0x0328}}, // 'Ą' + {0x0105, {0x0061, 0x0328}}, // 'ą' + {0x0106, {0x0043, 0x0301}}, // 'Ć' + {0x0107, {0x0063, 0x0301}}, // 'ć' + {0x0108, {0x0043, 0x0302}}, // 'Ĉ' + {0x0109, {0x0063, 0x0302}}, // 'ĉ' + {0x010A, {0x0043, 0x0307}}, // 'Ċ' + {0x010B, {0x0063, 0x0307}}, // 'ċ' + {0x010C, {0x0043, 0x030C}}, // 'Č' + {0x010D, {0x0063, 0x030C}}, // 'č' + {0x010E, {0x0044, 0x030C}}, // 'Ď' + {0x010F, {0x0064, 0x030C}}, // 'ď' + {0x0112, {0x0045, 0x0304}}, // 'Ē' + {0x0113, {0x0065, 0x0304}}, // 'ē' + {0x0114, {0x0045, 0x0306}}, // 'Ĕ' + {0x0115, {0x0065, 0x0306}}, // 'ĕ' + {0x0116, {0x0045, 0x0307}}, // 'Ė' + {0x0117, {0x0065, 0x0307}}, // 'ė' + {0x0118, {0x0045, 0x0328}}, // 'Ę' + {0x0119, {0x0065, 0x0328}}, // 'ę' + {0x011A, {0x0045, 0x030C}}, // 'Ě' + {0x011B, {0x0065, 0x030C}}, // 'ě' + {0x011C, {0x0047, 0x0302}}, // 'Ĝ' + {0x011D, {0x0067, 0x0302}}, // 'ĝ' + {0x011E, {0x0047, 0x0306}}, // 'Ğ' + {0x011F, {0x0067, 0x0306}}, // 'ğ' + {0x0120, {0x0047, 0x0307}}, // 'Ġ' + {0x0121, {0x0067, 0x0307}}, // 'ġ' + {0x0122, {0x0047, 0x0327}}, // 'Ģ' + {0x0123, {0x0067, 0x0327}}, // 'ģ' + {0x0124, {0x0048, 0x0302}}, // 'Ĥ' + {0x0125, {0x0068, 0x0302}}, // 'ĥ' + {0x0128, {0x0049, 0x0303}}, // 'Ĩ' + {0x0129, {0x0069, 0x0303}}, // 'ĩ' + {0x012A, {0x0049, 0x0304}}, // 'Ī' + {0x012B, {0x0069, 0x0304}}, // 'ī' + {0x012C, {0x0049, 0x0306}}, // 'Ĭ' + {0x012D, {0x0069, 0x0306}}, // 'ĭ' + {0x012E, {0x0049, 0x0328}}, // 'Į' + {0x012F, {0x0069, 0x0328}}, // 'į' + {0x0130, {0x0049, 0x0307}}, // 'İ' + {0x0134, {0x004A, 0x0302}}, // 'Ĵ' + {0x0135, {0x006A, 0x0302}}, // 'ĵ' + {0x0136, {0x004B, 0x0327}}, // 'Ķ' + {0x0137, {0x006B, 0x0327}}, // 'ķ' + {0x0139, {0x004C, 0x0301}}, // 'Ĺ' + {0x013A, {0x006C, 0x0301}}, // 'ĺ' + {0x013B, {0x004C, 0x0327}}, // 'Ļ' + {0x013C, {0x006C, 0x0327}}, // 'ļ' + {0x013D, {0x004C, 0x030C}}, // 'Ľ' + {0x013E, {0x006C, 0x030C}}, // 'ľ' + {0x0143, {0x004E, 0x0301}}, // 'Ń' + {0x0144, {0x006E, 0x0301}}, // 'ń' + {0x0145, {0x004E, 0x0327}}, // 'Ņ' + {0x0146, {0x006E, 0x0327}}, // 'ņ' + {0x0147, {0x004E, 0x030C}}, // 'Ň' + {0x0148, {0x006E, 0x030C}}, // 'ň' + {0x014C, {0x004F, 0x0304}}, // 'Ō' + {0x014D, {0x006F, 0x0304}}, // 'ō' + {0x014E, {0x004F, 0x0306}}, // 'Ŏ' + {0x014F, {0x006F, 0x0306}}, // 'ŏ' + {0x0150, {0x004F, 0x030B}}, // 'Ő' + {0x0151, {0x006F, 0x030B}}, // 'ő' + {0x0154, {0x0052, 0x0301}}, // 'Ŕ' + {0x0155, {0x0072, 0x0301}}, // 'ŕ' + {0x0156, {0x0052, 0x0327}}, // 'Ŗ' + {0x0157, {0x0072, 0x0327}}, // 'ŗ' + {0x0158, {0x0052, 0x030C}}, // 'Ř' + {0x0159, {0x0072, 0x030C}}, // 'ř' + {0x015A, {0x0053, 0x0301}}, // 'Ś' + {0x015B, {0x0073, 0x0301}}, // 'ś' + {0x015C, {0x0053, 0x0302}}, // 'Ŝ' + {0x015D, {0x0073, 0x0302}}, // 'ŝ' + {0x015E, {0x0053, 0x0327}}, // 'Ş' + {0x015F, {0x0073, 0x0327}}, // 'ş' + {0x0160, {0x0053, 0x030C}}, // 'Š' + {0x0161, {0x0073, 0x030C}}, // 'š' + {0x0162, {0x0054, 0x0327}}, // 'Ţ' + {0x0163, {0x0074, 0x0327}}, // 'ţ' + {0x0164, {0x0054, 0x030C}}, // 'Ť' + {0x0165, {0x0074, 0x030C}}, // 'ť' + {0x0168, {0x0055, 0x0303}}, // 'Ũ' + {0x0169, {0x0075, 0x0303}}, // 'ũ' + {0x016A, {0x0055, 0x0304}}, // 'Ū' + {0x016B, {0x0075, 0x0304}}, // 'ū' + {0x016C, {0x0055, 0x0306}}, // 'Ŭ' + {0x016D, {0x0075, 0x0306}}, // 'ŭ' + {0x016E, {0x0055, 0x030A}}, // 'Ů' + {0x016F, {0x0075, 0x030A}}, // 'ů' + {0x0170, {0x0055, 0x030B}}, // 'Ű' + {0x0171, {0x0075, 0x030B}}, // 'ű' + {0x0172, {0x0055, 0x0328}}, // 'Ų' + {0x0173, {0x0075, 0x0328}}, // 'ų' + {0x0174, {0x0057, 0x0302}}, // 'Ŵ' + {0x0175, {0x0077, 0x0302}}, // 'ŵ' + {0x0176, {0x0059, 0x0302}}, // 'Ŷ' + {0x0177, {0x0079, 0x0302}}, // 'ŷ' + {0x0178, {0x0059, 0x0308}}, // 'Ÿ' + {0x0179, {0x005A, 0x0301}}, // 'Ź' + {0x017A, {0x007A, 0x0301}}, // 'ź' + {0x017B, {0x005A, 0x0307}}, // 'Ż' + {0x017C, {0x007A, 0x0307}}, // 'ż' + {0x017D, {0x005A, 0x030C}}, // 'Ž' + {0x017E, {0x007A, 0x030C}}, // 'ž' + {0x01A0, {0x004F, 0x031B}}, // 'Ơ' + {0x01A1, {0x006F, 0x031B}}, // 'ơ' + {0x01AF, {0x0055, 0x031B}}, // 'Ư' + {0x01B0, {0x0075, 0x031B}}, // 'ư' + {0x01CD, {0x0041, 0x030C}}, // 'Ǎ' + {0x01CE, {0x0061, 0x030C}}, // 'ǎ' + {0x01CF, {0x0049, 0x030C}}, // 'Ǐ' + {0x01D0, {0x0069, 0x030C}}, // 'ǐ' + {0x01D1, {0x004F, 0x030C}}, // 'Ǒ' + {0x01D2, {0x006F, 0x030C}}, // 'ǒ' + {0x01D3, {0x0055, 0x030C}}, // 'Ǔ' + {0x01D4, {0x0075, 0x030C}}, // 'ǔ' + {0x01D5, {0x0055, 0x0308, 0x0304}}, // 'Ǖ' + {0x01D6, {0x0075, 0x0308, 0x0304}}, // 'ǖ' + {0x01D7, {0x0055, 0x0308, 0x0301}}, // 'Ǘ' + {0x01D8, {0x0075, 0x0308, 0x0301}}, // 'ǘ' + {0x01D9, {0x0055, 0x0308, 0x030C}}, // 'Ǚ' + {0x01DA, {0x0075, 0x0308, 0x030C}}, // 'ǚ' + {0x01DB, {0x0055, 0x0308, 0x0300}}, // 'Ǜ' + {0x01DC, {0x0075, 0x0308, 0x0300}}, // 'ǜ' + {0x01DE, {0x0041, 0x0308, 0x0304}}, // 'Ǟ' + {0x01DF, {0x0061, 0x0308, 0x0304}}, // 'ǟ' + {0x01E0, {0x0041, 0x0307, 0x0304}}, // 'Ǡ' + {0x01E1, {0x0061, 0x0307, 0x0304}}, // 'ǡ' + {0x01E2, {0x00C6, 0x0304}}, // 'Ǣ' + {0x01E3, {0x00E6, 0x0304}}, // 'ǣ' + {0x01E6, {0x0047, 0x030C}}, // 'Ǧ' + {0x01E7, {0x0067, 0x030C}}, // 'ǧ' + {0x01E8, {0x004B, 0x030C}}, // 'Ǩ' + {0x01E9, {0x006B, 0x030C}}, // 'ǩ' + {0x01EA, {0x004F, 0x0328}}, // 'Ǫ' + {0x01EB, {0x006F, 0x0328}}, // 'ǫ' + {0x01EC, {0x004F, 0x0328, 0x0304}}, // 'Ǭ' + {0x01ED, {0x006F, 0x0328, 0x0304}}, // 'ǭ' + {0x01EE, {0x01B7, 0x030C}}, // 'Ǯ' + {0x01EF, {0x0292, 0x030C}}, // 'ǯ' + {0x01F0, {0x006A, 0x030C}}, // 'ǰ' + {0x01F4, {0x0047, 0x0301}}, // 'Ǵ' + {0x01F5, {0x0067, 0x0301}}, // 'ǵ' + {0x01F8, {0x004E, 0x0300}}, // 'Ǹ' + {0x01F9, {0x006E, 0x0300}}, // 'ǹ' + {0x01FA, {0x0041, 0x030A, 0x0301}}, // 'Ǻ' + {0x01FB, {0x0061, 0x030A, 0x0301}}, // 'ǻ' + {0x01FC, {0x00C6, 0x0301}}, // 'Ǽ' + {0x01FD, {0x00E6, 0x0301}}, // 'ǽ' + {0x01FE, {0x00D8, 0x0301}}, // 'Ǿ' + {0x01FF, {0x00F8, 0x0301}}, // 'ǿ' + {0x0200, {0x0041, 0x030F}}, // 'Ȁ' + {0x0201, {0x0061, 0x030F}}, // 'ȁ' + {0x0202, {0x0041, 0x0311}}, // 'Ȃ' + {0x0203, {0x0061, 0x0311}}, // 'ȃ' + {0x0204, {0x0045, 0x030F}}, // 'Ȅ' + {0x0205, {0x0065, 0x030F}}, // 'ȅ' + {0x0206, {0x0045, 0x0311}}, // 'Ȇ' + {0x0207, {0x0065, 0x0311}}, // 'ȇ' + {0x0208, {0x0049, 0x030F}}, // 'Ȉ' + {0x0209, {0x0069, 0x030F}}, // 'ȉ' + {0x020A, {0x0049, 0x0311}}, // 'Ȋ' + {0x020B, {0x0069, 0x0311}}, // 'ȋ' + {0x020C, {0x004F, 0x030F}}, // 'Ȍ' + {0x020D, {0x006F, 0x030F}}, // 'ȍ' + {0x020E, {0x004F, 0x0311}}, // 'Ȏ' + {0x020F, {0x006F, 0x0311}}, // 'ȏ' + {0x0210, {0x0052, 0x030F}}, // 'Ȑ' + {0x0211, {0x0072, 0x030F}}, // 'ȑ' + {0x0212, {0x0052, 0x0311}}, // 'Ȓ' + {0x0213, {0x0072, 0x0311}}, // 'ȓ' + {0x0214, {0x0055, 0x030F}}, // 'Ȕ' + {0x0215, {0x0075, 0x030F}}, // 'ȕ' + {0x0216, {0x0055, 0x0311}}, // 'Ȗ' + {0x0217, {0x0075, 0x0311}}, // 'ȗ' + {0x0218, {0x0053, 0x0326}}, // 'Ș' + {0x0219, {0x0073, 0x0326}}, // 'ș' + {0x021A, {0x0054, 0x0326}}, // 'Ț' + {0x021B, {0x0074, 0x0326}}, // 'ț' + {0x021E, {0x0048, 0x030C}}, // 'Ȟ' + {0x021F, {0x0068, 0x030C}}, // 'ȟ' + {0x0226, {0x0041, 0x0307}}, // 'Ȧ' + {0x0227, {0x0061, 0x0307}}, // 'ȧ' + {0x0228, {0x0045, 0x0327}}, // 'Ȩ' + {0x0229, {0x0065, 0x0327}}, // 'ȩ' + {0x022A, {0x004F, 0x0308, 0x0304}}, // 'Ȫ' + {0x022B, {0x006F, 0x0308, 0x0304}}, // 'ȫ' + {0x022C, {0x004F, 0x0303, 0x0304}}, // 'Ȭ' + {0x022D, {0x006F, 0x0303, 0x0304}}, // 'ȭ' + {0x022E, {0x004F, 0x0307}}, // 'Ȯ' + {0x022F, {0x006F, 0x0307}}, // 'ȯ' + {0x0230, {0x004F, 0x0307, 0x0304}}, // 'Ȱ' + {0x0231, {0x006F, 0x0307, 0x0304}}, // 'ȱ' + {0x0232, {0x0059, 0x0304}}, // 'Ȳ' + {0x0233, {0x0079, 0x0304}}, // 'ȳ' + {0x0340, {0x0300}}, // '̀' + {0x0341, {0x0301}}, // '́' + {0x0343, {0x0313}}, // '̓' + {0x0344, {0x0308, 0x0301}}, // '̈́' + {0x0374, {0x02B9}}, // 'ʹ' + {0x037E, {0x003B}}, // ';' + {0x0385, {0x00A8, 0x0301}}, // '΅' + {0x0386, {0x0391, 0x0301}}, // 'Ά' + {0x0387, {0x00B7}}, // '·' + {0x0388, {0x0395, 0x0301}}, // 'Έ' + {0x0389, {0x0397, 0x0301}}, // 'Ή' + {0x038A, {0x0399, 0x0301}}, // 'Ί' + {0x038C, {0x039F, 0x0301}}, // 'Ό' + {0x038E, {0x03A5, 0x0301}}, // 'Ύ' + {0x038F, {0x03A9, 0x0301}}, // 'Ώ' + {0x0390, {0x03B9, 0x0308, 0x0301}}, // 'ΐ' + {0x03AA, {0x0399, 0x0308}}, // 'Ϊ' + {0x03AB, {0x03A5, 0x0308}}, // 'Ϋ' + {0x03AC, {0x03B1, 0x0301}}, // 'ά' + {0x03AD, {0x03B5, 0x0301}}, // 'έ' + {0x03AE, {0x03B7, 0x0301}}, // 'ή' + {0x03AF, {0x03B9, 0x0301}}, // 'ί' + {0x03B0, {0x03C5, 0x0308, 0x0301}}, // 'ΰ' + {0x03CA, {0x03B9, 0x0308}}, // 'ϊ' + {0x03CB, {0x03C5, 0x0308}}, // 'ϋ' + {0x03CC, {0x03BF, 0x0301}}, // 'ό' + {0x03CD, {0x03C5, 0x0301}}, // 'ύ' + {0x03CE, {0x03C9, 0x0301}}, // 'ώ' + {0x03D3, {0x03D2, 0x0301}}, // 'ϓ' + {0x03D4, {0x03D2, 0x0308}}, // 'ϔ' + {0x0400, {0x0415, 0x0300}}, // 'Ѐ' + {0x0401, {0x0415, 0x0308}}, // 'Ё' + {0x0403, {0x0413, 0x0301}}, // 'Ѓ' + {0x0407, {0x0406, 0x0308}}, // 'Ї' + {0x040C, {0x041A, 0x0301}}, // 'Ќ' + {0x040D, {0x0418, 0x0300}}, // 'Ѝ' + {0x040E, {0x0423, 0x0306}}, // 'Ў' + {0x0419, {0x0418, 0x0306}}, // 'Й' + {0x0439, {0x0438, 0x0306}}, // 'й' + {0x0450, {0x0435, 0x0300}}, // 'ѐ' + {0x0451, {0x0435, 0x0308}}, // 'ё' + {0x0453, {0x0433, 0x0301}}, // 'ѓ' + {0x0457, {0x0456, 0x0308}}, // 'ї' + {0x045C, {0x043A, 0x0301}}, // 'ќ' + {0x045D, {0x0438, 0x0300}}, // 'ѝ' + {0x045E, {0x0443, 0x0306}}, // 'ў' + {0x0476, {0x0474, 0x030F}}, // 'Ѷ' + {0x0477, {0x0475, 0x030F}}, // 'ѷ' + {0x04C1, {0x0416, 0x0306}}, // 'Ӂ' + {0x04C2, {0x0436, 0x0306}}, // 'ӂ' + {0x04D0, {0x0410, 0x0306}}, // 'Ӑ' + {0x04D1, {0x0430, 0x0306}}, // 'ӑ' + {0x04D2, {0x0410, 0x0308}}, // 'Ӓ' + {0x04D3, {0x0430, 0x0308}}, // 'ӓ' + {0x04D6, {0x0415, 0x0306}}, // 'Ӗ' + {0x04D7, {0x0435, 0x0306}}, // 'ӗ' + {0x04DA, {0x04D8, 0x0308}}, // 'Ӛ' + {0x04DB, {0x04D9, 0x0308}}, // 'ӛ' + {0x04DC, {0x0416, 0x0308}}, // 'Ӝ' + {0x04DD, {0x0436, 0x0308}}, // 'ӝ' + {0x04DE, {0x0417, 0x0308}}, // 'Ӟ' + {0x04DF, {0x0437, 0x0308}}, // 'ӟ' + {0x04E2, {0x0418, 0x0304}}, // 'Ӣ' + {0x04E3, {0x0438, 0x0304}}, // 'ӣ' + {0x04E4, {0x0418, 0x0308}}, // 'Ӥ' + {0x04E5, {0x0438, 0x0308}}, // 'ӥ' + {0x04E6, {0x041E, 0x0308}}, // 'Ӧ' + {0x04E7, {0x043E, 0x0308}}, // 'ӧ' + {0x04EA, {0x04E8, 0x0308}}, // 'Ӫ' + {0x04EB, {0x04E9, 0x0308}}, // 'ӫ' + {0x04EC, {0x042D, 0x0308}}, // 'Ӭ' + {0x04ED, {0x044D, 0x0308}}, // 'ӭ' + {0x04EE, {0x0423, 0x0304}}, // 'Ӯ' + {0x04EF, {0x0443, 0x0304}}, // 'ӯ' + {0x04F0, {0x0423, 0x0308}}, // 'Ӱ' + {0x04F1, {0x0443, 0x0308}}, // 'ӱ' + {0x04F2, {0x0423, 0x030B}}, // 'Ӳ' + {0x04F3, {0x0443, 0x030B}}, // 'ӳ' + {0x04F4, {0x0427, 0x0308}}, // 'Ӵ' + {0x04F5, {0x0447, 0x0308}}, // 'ӵ' + {0x04F8, {0x042B, 0x0308}}, // 'Ӹ' + {0x04F9, {0x044B, 0x0308}}, // 'ӹ' + {0x0622, {0x0627, 0x0653}}, // 'آ' + {0x0623, {0x0627, 0x0654}}, // 'أ' + {0x0624, {0x0648, 0x0654}}, // 'ؤ' + {0x0625, {0x0627, 0x0655}}, // 'إ' + {0x0626, {0x064A, 0x0654}}, // 'ئ' + {0x06C0, {0x06D5, 0x0654}}, // 'ۀ' + {0x06C2, {0x06C1, 0x0654}}, // 'ۂ' + {0x06D3, {0x06D2, 0x0654}}, // 'ۓ' + {0x0929, {0x0928, 0x093C}}, // 'ऩ' + {0x0931, {0x0930, 0x093C}}, // 'ऱ' + {0x0934, {0x0933, 0x093C}}, // 'ऴ' + {0x0958, {0x0915, 0x093C}}, // 'क़' + {0x0959, {0x0916, 0x093C}}, // 'ख़' + {0x095A, {0x0917, 0x093C}}, // 'ग़' + {0x095B, {0x091C, 0x093C}}, // 'ज़' + {0x095C, {0x0921, 0x093C}}, // 'ड़' + {0x095D, {0x0922, 0x093C}}, // 'ढ़' + {0x095E, {0x092B, 0x093C}}, // 'फ़' + {0x095F, {0x092F, 0x093C}}, // 'य़' + {0x09CB, {0x09C7, 0x09BE}}, // 'ো' + {0x09CC, {0x09C7, 0x09D7}}, // 'ৌ' + {0x09DC, {0x09A1, 0x09BC}}, // 'ড়' + {0x09DD, {0x09A2, 0x09BC}}, // 'ঢ়' + {0x09DF, {0x09AF, 0x09BC}}, // 'য়' + {0x0A33, {0x0A32, 0x0A3C}}, // 'ਲ਼' + {0x0A36, {0x0A38, 0x0A3C}}, // 'ਸ਼' + {0x0A59, {0x0A16, 0x0A3C}}, // 'ਖ਼' + {0x0A5A, {0x0A17, 0x0A3C}}, // 'ਗ਼' + {0x0A5B, {0x0A1C, 0x0A3C}}, // 'ਜ਼' + {0x0A5E, {0x0A2B, 0x0A3C}}, // 'ਫ਼' + {0x0B48, {0x0B47, 0x0B56}}, // 'ୈ' + {0x0B4B, {0x0B47, 0x0B3E}}, // 'ୋ' + {0x0B4C, {0x0B47, 0x0B57}}, // 'ୌ' + {0x0B5C, {0x0B21, 0x0B3C}}, // 'ଡ଼' + {0x0B5D, {0x0B22, 0x0B3C}}, // 'ଢ଼' + {0x0B94, {0x0B92, 0x0BD7}}, // 'ஔ' + {0x0BCA, {0x0BC6, 0x0BBE}}, // 'ொ' + {0x0BCB, {0x0BC7, 0x0BBE}}, // 'ோ' + {0x0BCC, {0x0BC6, 0x0BD7}}, // 'ௌ' + {0x0C48, {0x0C46, 0x0C56}}, // 'ై' + {0x0CC0, {0x0CBF, 0x0CD5}}, // 'ೀ' + {0x0CC7, {0x0CC6, 0x0CD5}}, // 'ೇ' + {0x0CC8, {0x0CC6, 0x0CD6}}, // 'ೈ' + {0x0CCA, {0x0CC6, 0x0CC2}}, // 'ೊ' + {0x0CCB, {0x0CC6, 0x0CC2, 0x0CD5}}, // 'ೋ' + {0x0D4A, {0x0D46, 0x0D3E}}, // 'ൊ' + {0x0D4B, {0x0D47, 0x0D3E}}, // 'ോ' + {0x0D4C, {0x0D46, 0x0D57}}, // 'ൌ' + {0x0DDA, {0x0DD9, 0x0DCA}}, // 'ේ' + {0x0DDC, {0x0DD9, 0x0DCF}}, // 'ො' + {0x0DDD, {0x0DD9, 0x0DCF, 0x0DCA}}, // 'ෝ' + {0x0DDE, {0x0DD9, 0x0DDF}}, // 'ෞ' + {0x0F43, {0x0F42, 0x0FB7}}, // 'གྷ' + {0x0F4D, {0x0F4C, 0x0FB7}}, // 'ཌྷ' + {0x0F52, {0x0F51, 0x0FB7}}, // 'དྷ' + {0x0F57, {0x0F56, 0x0FB7}}, // 'བྷ' + {0x0F5C, {0x0F5B, 0x0FB7}}, // 'ཛྷ' + {0x0F69, {0x0F40, 0x0FB5}}, // 'ཀྵ' + {0x0F73, {0x0F71, 0x0F72}}, // 'ཱི' + {0x0F75, {0x0F71, 0x0F74}}, // 'ཱུ' + {0x0F76, {0x0FB2, 0x0F80}}, // 'ྲྀ' + {0x0F78, {0x0FB3, 0x0F80}}, // 'ླྀ' + {0x0F81, {0x0F71, 0x0F80}}, // 'ཱྀ' + {0x0F93, {0x0F92, 0x0FB7}}, // 'ྒྷ' + {0x0F9D, {0x0F9C, 0x0FB7}}, // 'ྜྷ' + {0x0FA2, {0x0FA1, 0x0FB7}}, // 'ྡྷ' + {0x0FA7, {0x0FA6, 0x0FB7}}, // 'ྦྷ' + {0x0FAC, {0x0FAB, 0x0FB7}}, // 'ྫྷ' + {0x0FB9, {0x0F90, 0x0FB5}}, // 'ྐྵ' + {0x1026, {0x1025, 0x102E}}, // 'ဦ' + {0x1B06, {0x1B05, 0x1B35}}, // 'ᬆ' + {0x1B08, {0x1B07, 0x1B35}}, // 'ᬈ' + {0x1B0A, {0x1B09, 0x1B35}}, // 'ᬊ' + {0x1B0C, {0x1B0B, 0x1B35}}, // 'ᬌ' + {0x1B0E, {0x1B0D, 0x1B35}}, // 'ᬎ' + {0x1B12, {0x1B11, 0x1B35}}, // 'ᬒ' + {0x1B3B, {0x1B3A, 0x1B35}}, // 'ᬻ' + {0x1B3D, {0x1B3C, 0x1B35}}, // 'ᬽ' + {0x1B40, {0x1B3E, 0x1B35}}, // 'ᭀ' + {0x1B41, {0x1B3F, 0x1B35}}, // 'ᭁ' + {0x1B43, {0x1B42, 0x1B35}}, // 'ᭃ' + {0x1E00, {0x0041, 0x0325}}, // 'Ḁ' + {0x1E01, {0x0061, 0x0325}}, // 'ḁ' + {0x1E02, {0x0042, 0x0307}}, // 'Ḃ' + {0x1E03, {0x0062, 0x0307}}, // 'ḃ' + {0x1E04, {0x0042, 0x0323}}, // 'Ḅ' + {0x1E05, {0x0062, 0x0323}}, // 'ḅ' + {0x1E06, {0x0042, 0x0331}}, // 'Ḇ' + {0x1E07, {0x0062, 0x0331}}, // 'ḇ' + {0x1E08, {0x0043, 0x0327, 0x0301}}, // 'Ḉ' + {0x1E09, {0x0063, 0x0327, 0x0301}}, // 'ḉ' + {0x1E0A, {0x0044, 0x0307}}, // 'Ḋ' + {0x1E0B, {0x0064, 0x0307}}, // 'ḋ' + {0x1E0C, {0x0044, 0x0323}}, // 'Ḍ' + {0x1E0D, {0x0064, 0x0323}}, // 'ḍ' + {0x1E0E, {0x0044, 0x0331}}, // 'Ḏ' + {0x1E0F, {0x0064, 0x0331}}, // 'ḏ' + {0x1E10, {0x0044, 0x0327}}, // 'Ḑ' + {0x1E11, {0x0064, 0x0327}}, // 'ḑ' + {0x1E12, {0x0044, 0x032D}}, // 'Ḓ' + {0x1E13, {0x0064, 0x032D}}, // 'ḓ' + {0x1E14, {0x0045, 0x0304, 0x0300}}, // 'Ḕ' + {0x1E15, {0x0065, 0x0304, 0x0300}}, // 'ḕ' + {0x1E16, {0x0045, 0x0304, 0x0301}}, // 'Ḗ' + {0x1E17, {0x0065, 0x0304, 0x0301}}, // 'ḗ' + {0x1E18, {0x0045, 0x032D}}, // 'Ḙ' + {0x1E19, {0x0065, 0x032D}}, // 'ḙ' + {0x1E1A, {0x0045, 0x0330}}, // 'Ḛ' + {0x1E1B, {0x0065, 0x0330}}, // 'ḛ' + {0x1E1C, {0x0045, 0x0327, 0x0306}}, // 'Ḝ' + {0x1E1D, {0x0065, 0x0327, 0x0306}}, // 'ḝ' + {0x1E1E, {0x0046, 0x0307}}, // 'Ḟ' + {0x1E1F, {0x0066, 0x0307}}, // 'ḟ' + {0x1E20, {0x0047, 0x0304}}, // 'Ḡ' + {0x1E21, {0x0067, 0x0304}}, // 'ḡ' + {0x1E22, {0x0048, 0x0307}}, // 'Ḣ' + {0x1E23, {0x0068, 0x0307}}, // 'ḣ' + {0x1E24, {0x0048, 0x0323}}, // 'Ḥ' + {0x1E25, {0x0068, 0x0323}}, // 'ḥ' + {0x1E26, {0x0048, 0x0308}}, // 'Ḧ' + {0x1E27, {0x0068, 0x0308}}, // 'ḧ' + {0x1E28, {0x0048, 0x0327}}, // 'Ḩ' + {0x1E29, {0x0068, 0x0327}}, // 'ḩ' + {0x1E2A, {0x0048, 0x032E}}, // 'Ḫ' + {0x1E2B, {0x0068, 0x032E}}, // 'ḫ' + {0x1E2C, {0x0049, 0x0330}}, // 'Ḭ' + {0x1E2D, {0x0069, 0x0330}}, // 'ḭ' + {0x1E2E, {0x0049, 0x0308, 0x0301}}, // 'Ḯ' + {0x1E2F, {0x0069, 0x0308, 0x0301}}, // 'ḯ' + {0x1E30, {0x004B, 0x0301}}, // 'Ḱ' + {0x1E31, {0x006B, 0x0301}}, // 'ḱ' + {0x1E32, {0x004B, 0x0323}}, // 'Ḳ' + {0x1E33, {0x006B, 0x0323}}, // 'ḳ' + {0x1E34, {0x004B, 0x0331}}, // 'Ḵ' + {0x1E35, {0x006B, 0x0331}}, // 'ḵ' + {0x1E36, {0x004C, 0x0323}}, // 'Ḷ' + {0x1E37, {0x006C, 0x0323}}, // 'ḷ' + {0x1E38, {0x004C, 0x0323, 0x0304}}, // 'Ḹ' + {0x1E39, {0x006C, 0x0323, 0x0304}}, // 'ḹ' + {0x1E3A, {0x004C, 0x0331}}, // 'Ḻ' + {0x1E3B, {0x006C, 0x0331}}, // 'ḻ' + {0x1E3C, {0x004C, 0x032D}}, // 'Ḽ' + {0x1E3D, {0x006C, 0x032D}}, // 'ḽ' + {0x1E3E, {0x004D, 0x0301}}, // 'Ḿ' + {0x1E3F, {0x006D, 0x0301}}, // 'ḿ' + {0x1E40, {0x004D, 0x0307}}, // 'Ṁ' + {0x1E41, {0x006D, 0x0307}}, // 'ṁ' + {0x1E42, {0x004D, 0x0323}}, // 'Ṃ' + {0x1E43, {0x006D, 0x0323}}, // 'ṃ' + {0x1E44, {0x004E, 0x0307}}, // 'Ṅ' + {0x1E45, {0x006E, 0x0307}}, // 'ṅ' + {0x1E46, {0x004E, 0x0323}}, // 'Ṇ' + {0x1E47, {0x006E, 0x0323}}, // 'ṇ' + {0x1E48, {0x004E, 0x0331}}, // 'Ṉ' + {0x1E49, {0x006E, 0x0331}}, // 'ṉ' + {0x1E4A, {0x004E, 0x032D}}, // 'Ṋ' + {0x1E4B, {0x006E, 0x032D}}, // 'ṋ' + {0x1E4C, {0x004F, 0x0303, 0x0301}}, // 'Ṍ' + {0x1E4D, {0x006F, 0x0303, 0x0301}}, // 'ṍ' + {0x1E4E, {0x004F, 0x0303, 0x0308}}, // 'Ṏ' + {0x1E4F, {0x006F, 0x0303, 0x0308}}, // 'ṏ' + {0x1E50, {0x004F, 0x0304, 0x0300}}, // 'Ṑ' + {0x1E51, {0x006F, 0x0304, 0x0300}}, // 'ṑ' + {0x1E52, {0x004F, 0x0304, 0x0301}}, // 'Ṓ' + {0x1E53, {0x006F, 0x0304, 0x0301}}, // 'ṓ' + {0x1E54, {0x0050, 0x0301}}, // 'Ṕ' + {0x1E55, {0x0070, 0x0301}}, // 'ṕ' + {0x1E56, {0x0050, 0x0307}}, // 'Ṗ' + {0x1E57, {0x0070, 0x0307}}, // 'ṗ' + {0x1E58, {0x0052, 0x0307}}, // 'Ṙ' + {0x1E59, {0x0072, 0x0307}}, // 'ṙ' + {0x1E5A, {0x0052, 0x0323}}, // 'Ṛ' + {0x1E5B, {0x0072, 0x0323}}, // 'ṛ' + {0x1E5C, {0x0052, 0x0323, 0x0304}}, // 'Ṝ' + {0x1E5D, {0x0072, 0x0323, 0x0304}}, // 'ṝ' + {0x1E5E, {0x0052, 0x0331}}, // 'Ṟ' + {0x1E5F, {0x0072, 0x0331}}, // 'ṟ' + {0x1E60, {0x0053, 0x0307}}, // 'Ṡ' + {0x1E61, {0x0073, 0x0307}}, // 'ṡ' + {0x1E62, {0x0053, 0x0323}}, // 'Ṣ' + {0x1E63, {0x0073, 0x0323}}, // 'ṣ' + {0x1E64, {0x0053, 0x0301, 0x0307}}, // 'Ṥ' + {0x1E65, {0x0073, 0x0301, 0x0307}}, // 'ṥ' + {0x1E66, {0x0053, 0x030C, 0x0307}}, // 'Ṧ' + {0x1E67, {0x0073, 0x030C, 0x0307}}, // 'ṧ' + {0x1E68, {0x0053, 0x0323, 0x0307}}, // 'Ṩ' + {0x1E69, {0x0073, 0x0323, 0x0307}}, // 'ṩ' + {0x1E6A, {0x0054, 0x0307}}, // 'Ṫ' + {0x1E6B, {0x0074, 0x0307}}, // 'ṫ' + {0x1E6C, {0x0054, 0x0323}}, // 'Ṭ' + {0x1E6D, {0x0074, 0x0323}}, // 'ṭ' + {0x1E6E, {0x0054, 0x0331}}, // 'Ṯ' + {0x1E6F, {0x0074, 0x0331}}, // 'ṯ' + {0x1E70, {0x0054, 0x032D}}, // 'Ṱ' + {0x1E71, {0x0074, 0x032D}}, // 'ṱ' + {0x1E72, {0x0055, 0x0324}}, // 'Ṳ' + {0x1E73, {0x0075, 0x0324}}, // 'ṳ' + {0x1E74, {0x0055, 0x0330}}, // 'Ṵ' + {0x1E75, {0x0075, 0x0330}}, // 'ṵ' + {0x1E76, {0x0055, 0x032D}}, // 'Ṷ' + {0x1E77, {0x0075, 0x032D}}, // 'ṷ' + {0x1E78, {0x0055, 0x0303, 0x0301}}, // 'Ṹ' + {0x1E79, {0x0075, 0x0303, 0x0301}}, // 'ṹ' + {0x1E7A, {0x0055, 0x0304, 0x0308}}, // 'Ṻ' + {0x1E7B, {0x0075, 0x0304, 0x0308}}, // 'ṻ' + {0x1E7C, {0x0056, 0x0303}}, // 'Ṽ' + {0x1E7D, {0x0076, 0x0303}}, // 'ṽ' + {0x1E7E, {0x0056, 0x0323}}, // 'Ṿ' + {0x1E7F, {0x0076, 0x0323}}, // 'ṿ' + {0x1E80, {0x0057, 0x0300}}, // 'Ẁ' + {0x1E81, {0x0077, 0x0300}}, // 'ẁ' + {0x1E82, {0x0057, 0x0301}}, // 'Ẃ' + {0x1E83, {0x0077, 0x0301}}, // 'ẃ' + {0x1E84, {0x0057, 0x0308}}, // 'Ẅ' + {0x1E85, {0x0077, 0x0308}}, // 'ẅ' + {0x1E86, {0x0057, 0x0307}}, // 'Ẇ' + {0x1E87, {0x0077, 0x0307}}, // 'ẇ' + {0x1E88, {0x0057, 0x0323}}, // 'Ẉ' + {0x1E89, {0x0077, 0x0323}}, // 'ẉ' + {0x1E8A, {0x0058, 0x0307}}, // 'Ẋ' + {0x1E8B, {0x0078, 0x0307}}, // 'ẋ' + {0x1E8C, {0x0058, 0x0308}}, // 'Ẍ' + {0x1E8D, {0x0078, 0x0308}}, // 'ẍ' + {0x1E8E, {0x0059, 0x0307}}, // 'Ẏ' + {0x1E8F, {0x0079, 0x0307}}, // 'ẏ' + {0x1E90, {0x005A, 0x0302}}, // 'Ẑ' + {0x1E91, {0x007A, 0x0302}}, // 'ẑ' + {0x1E92, {0x005A, 0x0323}}, // 'Ẓ' + {0x1E93, {0x007A, 0x0323}}, // 'ẓ' + {0x1E94, {0x005A, 0x0331}}, // 'Ẕ' + {0x1E95, {0x007A, 0x0331}}, // 'ẕ' + {0x1E96, {0x0068, 0x0331}}, // 'ẖ' + {0x1E97, {0x0074, 0x0308}}, // 'ẗ' + {0x1E98, {0x0077, 0x030A}}, // 'ẘ' + {0x1E99, {0x0079, 0x030A}}, // 'ẙ' + {0x1E9B, {0x017F, 0x0307}}, // 'ẛ' + {0x1EA0, {0x0041, 0x0323}}, // 'Ạ' + {0x1EA1, {0x0061, 0x0323}}, // 'ạ' + {0x1EA2, {0x0041, 0x0309}}, // 'Ả' + {0x1EA3, {0x0061, 0x0309}}, // 'ả' + {0x1EA4, {0x0041, 0x0302, 0x0301}}, // 'Ấ' + {0x1EA5, {0x0061, 0x0302, 0x0301}}, // 'ấ' + {0x1EA6, {0x0041, 0x0302, 0x0300}}, // 'Ầ' + {0x1EA7, {0x0061, 0x0302, 0x0300}}, // 'ầ' + {0x1EA8, {0x0041, 0x0302, 0x0309}}, // 'Ẩ' + {0x1EA9, {0x0061, 0x0302, 0x0309}}, // 'ẩ' + {0x1EAA, {0x0041, 0x0302, 0x0303}}, // 'Ẫ' + {0x1EAB, {0x0061, 0x0302, 0x0303}}, // 'ẫ' + {0x1EAC, {0x0041, 0x0323, 0x0302}}, // 'Ậ' + {0x1EAD, {0x0061, 0x0323, 0x0302}}, // 'ậ' + {0x1EAE, {0x0041, 0x0306, 0x0301}}, // 'Ắ' + {0x1EAF, {0x0061, 0x0306, 0x0301}}, // 'ắ' + {0x1EB0, {0x0041, 0x0306, 0x0300}}, // 'Ằ' + {0x1EB1, {0x0061, 0x0306, 0x0300}}, // 'ằ' + {0x1EB2, {0x0041, 0x0306, 0x0309}}, // 'Ẳ' + {0x1EB3, {0x0061, 0x0306, 0x0309}}, // 'ẳ' + {0x1EB4, {0x0041, 0x0306, 0x0303}}, // 'Ẵ' + {0x1EB5, {0x0061, 0x0306, 0x0303}}, // 'ẵ' + {0x1EB6, {0x0041, 0x0323, 0x0306}}, // 'Ặ' + {0x1EB7, {0x0061, 0x0323, 0x0306}}, // 'ặ' + {0x1EB8, {0x0045, 0x0323}}, // 'Ẹ' + {0x1EB9, {0x0065, 0x0323}}, // 'ẹ' + {0x1EBA, {0x0045, 0x0309}}, // 'Ẻ' + {0x1EBB, {0x0065, 0x0309}}, // 'ẻ' + {0x1EBC, {0x0045, 0x0303}}, // 'Ẽ' + {0x1EBD, {0x0065, 0x0303}}, // 'ẽ' + {0x1EBE, {0x0045, 0x0302, 0x0301}}, // 'Ế' + {0x1EBF, {0x0065, 0x0302, 0x0301}}, // 'ế' + {0x1EC0, {0x0045, 0x0302, 0x0300}}, // 'Ề' + {0x1EC1, {0x0065, 0x0302, 0x0300}}, // 'ề' + {0x1EC2, {0x0045, 0x0302, 0x0309}}, // 'Ể' + {0x1EC3, {0x0065, 0x0302, 0x0309}}, // 'ể' + {0x1EC4, {0x0045, 0x0302, 0x0303}}, // 'Ễ' + {0x1EC5, {0x0065, 0x0302, 0x0303}}, // 'ễ' + {0x1EC6, {0x0045, 0x0323, 0x0302}}, // 'Ệ' + {0x1EC7, {0x0065, 0x0323, 0x0302}}, // 'ệ' + {0x1EC8, {0x0049, 0x0309}}, // 'Ỉ' + {0x1EC9, {0x0069, 0x0309}}, // 'ỉ' + {0x1ECA, {0x0049, 0x0323}}, // 'Ị' + {0x1ECB, {0x0069, 0x0323}}, // 'ị' + {0x1ECC, {0x004F, 0x0323}}, // 'Ọ' + {0x1ECD, {0x006F, 0x0323}}, // 'ọ' + {0x1ECE, {0x004F, 0x0309}}, // 'Ỏ' + {0x1ECF, {0x006F, 0x0309}}, // 'ỏ' + {0x1ED0, {0x004F, 0x0302, 0x0301}}, // 'Ố' + {0x1ED1, {0x006F, 0x0302, 0x0301}}, // 'ố' + {0x1ED2, {0x004F, 0x0302, 0x0300}}, // 'Ồ' + {0x1ED3, {0x006F, 0x0302, 0x0300}}, // 'ồ' + {0x1ED4, {0x004F, 0x0302, 0x0309}}, // 'Ổ' + {0x1ED5, {0x006F, 0x0302, 0x0309}}, // 'ổ' + {0x1ED6, {0x004F, 0x0302, 0x0303}}, // 'Ỗ' + {0x1ED7, {0x006F, 0x0302, 0x0303}}, // 'ỗ' + {0x1ED8, {0x004F, 0x0323, 0x0302}}, // 'Ộ' + {0x1ED9, {0x006F, 0x0323, 0x0302}}, // 'ộ' + {0x1EDA, {0x004F, 0x031B, 0x0301}}, // 'Ớ' + {0x1EDB, {0x006F, 0x031B, 0x0301}}, // 'ớ' + {0x1EDC, {0x004F, 0x031B, 0x0300}}, // 'Ờ' + {0x1EDD, {0x006F, 0x031B, 0x0300}}, // 'ờ' + {0x1EDE, {0x004F, 0x031B, 0x0309}}, // 'Ở' + {0x1EDF, {0x006F, 0x031B, 0x0309}}, // 'ở' + {0x1EE0, {0x004F, 0x031B, 0x0303}}, // 'Ỡ' + {0x1EE1, {0x006F, 0x031B, 0x0303}}, // 'ỡ' + {0x1EE2, {0x004F, 0x031B, 0x0323}}, // 'Ợ' + {0x1EE3, {0x006F, 0x031B, 0x0323}}, // 'ợ' + {0x1EE4, {0x0055, 0x0323}}, // 'Ụ' + {0x1EE5, {0x0075, 0x0323}}, // 'ụ' + {0x1EE6, {0x0055, 0x0309}}, // 'Ủ' + {0x1EE7, {0x0075, 0x0309}}, // 'ủ' + {0x1EE8, {0x0055, 0x031B, 0x0301}}, // 'Ứ' + {0x1EE9, {0x0075, 0x031B, 0x0301}}, // 'ứ' + {0x1EEA, {0x0055, 0x031B, 0x0300}}, // 'Ừ' + {0x1EEB, {0x0075, 0x031B, 0x0300}}, // 'ừ' + {0x1EEC, {0x0055, 0x031B, 0x0309}}, // 'Ử' + {0x1EED, {0x0075, 0x031B, 0x0309}}, // 'ử' + {0x1EEE, {0x0055, 0x031B, 0x0303}}, // 'Ữ' + {0x1EEF, {0x0075, 0x031B, 0x0303}}, // 'ữ' + {0x1EF0, {0x0055, 0x031B, 0x0323}}, // 'Ự' + {0x1EF1, {0x0075, 0x031B, 0x0323}}, // 'ự' + {0x1EF2, {0x0059, 0x0300}}, // 'Ỳ' + {0x1EF3, {0x0079, 0x0300}}, // 'ỳ' + {0x1EF4, {0x0059, 0x0323}}, // 'Ỵ' + {0x1EF5, {0x0079, 0x0323}}, // 'ỵ' + {0x1EF6, {0x0059, 0x0309}}, // 'Ỷ' + {0x1EF7, {0x0079, 0x0309}}, // 'ỷ' + {0x1EF8, {0x0059, 0x0303}}, // 'Ỹ' + {0x1EF9, {0x0079, 0x0303}}, // 'ỹ' + {0x1F00, {0x03B1, 0x0313}}, // 'ἀ' + {0x1F01, {0x03B1, 0x0314}}, // 'ἁ' + {0x1F02, {0x03B1, 0x0313, 0x0300}}, // 'ἂ' + {0x1F03, {0x03B1, 0x0314, 0x0300}}, // 'ἃ' + {0x1F04, {0x03B1, 0x0313, 0x0301}}, // 'ἄ' + {0x1F05, {0x03B1, 0x0314, 0x0301}}, // 'ἅ' + {0x1F06, {0x03B1, 0x0313, 0x0342}}, // 'ἆ' + {0x1F07, {0x03B1, 0x0314, 0x0342}}, // 'ἇ' + {0x1F08, {0x0391, 0x0313}}, // 'Ἀ' + {0x1F09, {0x0391, 0x0314}}, // 'Ἁ' + {0x1F0A, {0x0391, 0x0313, 0x0300}}, // 'Ἂ' + {0x1F0B, {0x0391, 0x0314, 0x0300}}, // 'Ἃ' + {0x1F0C, {0x0391, 0x0313, 0x0301}}, // 'Ἄ' + {0x1F0D, {0x0391, 0x0314, 0x0301}}, // 'Ἅ' + {0x1F0E, {0x0391, 0x0313, 0x0342}}, // 'Ἆ' + {0x1F0F, {0x0391, 0x0314, 0x0342}}, // 'Ἇ' + {0x1F10, {0x03B5, 0x0313}}, // 'ἐ' + {0x1F11, {0x03B5, 0x0314}}, // 'ἑ' + {0x1F12, {0x03B5, 0x0313, 0x0300}}, // 'ἒ' + {0x1F13, {0x03B5, 0x0314, 0x0300}}, // 'ἓ' + {0x1F14, {0x03B5, 0x0313, 0x0301}}, // 'ἔ' + {0x1F15, {0x03B5, 0x0314, 0x0301}}, // 'ἕ' + {0x1F18, {0x0395, 0x0313}}, // 'Ἐ' + {0x1F19, {0x0395, 0x0314}}, // 'Ἑ' + {0x1F1A, {0x0395, 0x0313, 0x0300}}, // 'Ἒ' + {0x1F1B, {0x0395, 0x0314, 0x0300}}, // 'Ἓ' + {0x1F1C, {0x0395, 0x0313, 0x0301}}, // 'Ἔ' + {0x1F1D, {0x0395, 0x0314, 0x0301}}, // 'Ἕ' + {0x1F20, {0x03B7, 0x0313}}, // 'ἠ' + {0x1F21, {0x03B7, 0x0314}}, // 'ἡ' + {0x1F22, {0x03B7, 0x0313, 0x0300}}, // 'ἢ' + {0x1F23, {0x03B7, 0x0314, 0x0300}}, // 'ἣ' + {0x1F24, {0x03B7, 0x0313, 0x0301}}, // 'ἤ' + {0x1F25, {0x03B7, 0x0314, 0x0301}}, // 'ἥ' + {0x1F26, {0x03B7, 0x0313, 0x0342}}, // 'ἦ' + {0x1F27, {0x03B7, 0x0314, 0x0342}}, // 'ἧ' + {0x1F28, {0x0397, 0x0313}}, // 'Ἠ' + {0x1F29, {0x0397, 0x0314}}, // 'Ἡ' + {0x1F2A, {0x0397, 0x0313, 0x0300}}, // 'Ἢ' + {0x1F2B, {0x0397, 0x0314, 0x0300}}, // 'Ἣ' + {0x1F2C, {0x0397, 0x0313, 0x0301}}, // 'Ἤ' + {0x1F2D, {0x0397, 0x0314, 0x0301}}, // 'Ἥ' + {0x1F2E, {0x0397, 0x0313, 0x0342}}, // 'Ἦ' + {0x1F2F, {0x0397, 0x0314, 0x0342}}, // 'Ἧ' + {0x1F30, {0x03B9, 0x0313}}, // 'ἰ' + {0x1F31, {0x03B9, 0x0314}}, // 'ἱ' + {0x1F32, {0x03B9, 0x0313, 0x0300}}, // 'ἲ' + {0x1F33, {0x03B9, 0x0314, 0x0300}}, // 'ἳ' + {0x1F34, {0x03B9, 0x0313, 0x0301}}, // 'ἴ' + {0x1F35, {0x03B9, 0x0314, 0x0301}}, // 'ἵ' + {0x1F36, {0x03B9, 0x0313, 0x0342}}, // 'ἶ' + {0x1F37, {0x03B9, 0x0314, 0x0342}}, // 'ἷ' + {0x1F38, {0x0399, 0x0313}}, // 'Ἰ' + {0x1F39, {0x0399, 0x0314}}, // 'Ἱ' + {0x1F3A, {0x0399, 0x0313, 0x0300}}, // 'Ἲ' + {0x1F3B, {0x0399, 0x0314, 0x0300}}, // 'Ἳ' + {0x1F3C, {0x0399, 0x0313, 0x0301}}, // 'Ἴ' + {0x1F3D, {0x0399, 0x0314, 0x0301}}, // 'Ἵ' + {0x1F3E, {0x0399, 0x0313, 0x0342}}, // 'Ἶ' + {0x1F3F, {0x0399, 0x0314, 0x0342}}, // 'Ἷ' + {0x1F40, {0x03BF, 0x0313}}, // 'ὀ' + {0x1F41, {0x03BF, 0x0314}}, // 'ὁ' + {0x1F42, {0x03BF, 0x0313, 0x0300}}, // 'ὂ' + {0x1F43, {0x03BF, 0x0314, 0x0300}}, // 'ὃ' + {0x1F44, {0x03BF, 0x0313, 0x0301}}, // 'ὄ' + {0x1F45, {0x03BF, 0x0314, 0x0301}}, // 'ὅ' + {0x1F48, {0x039F, 0x0313}}, // 'Ὀ' + {0x1F49, {0x039F, 0x0314}}, // 'Ὁ' + {0x1F4A, {0x039F, 0x0313, 0x0300}}, // 'Ὂ' + {0x1F4B, {0x039F, 0x0314, 0x0300}}, // 'Ὃ' + {0x1F4C, {0x039F, 0x0313, 0x0301}}, // 'Ὄ' + {0x1F4D, {0x039F, 0x0314, 0x0301}}, // 'Ὅ' + {0x1F50, {0x03C5, 0x0313}}, // 'ὐ' + {0x1F51, {0x03C5, 0x0314}}, // 'ὑ' + {0x1F52, {0x03C5, 0x0313, 0x0300}}, // 'ὒ' + {0x1F53, {0x03C5, 0x0314, 0x0300}}, // 'ὓ' + {0x1F54, {0x03C5, 0x0313, 0x0301}}, // 'ὔ' + {0x1F55, {0x03C5, 0x0314, 0x0301}}, // 'ὕ' + {0x1F56, {0x03C5, 0x0313, 0x0342}}, // 'ὖ' + {0x1F57, {0x03C5, 0x0314, 0x0342}}, // 'ὗ' + {0x1F59, {0x03A5, 0x0314}}, // 'Ὑ' + {0x1F5B, {0x03A5, 0x0314, 0x0300}}, // 'Ὓ' + {0x1F5D, {0x03A5, 0x0314, 0x0301}}, // 'Ὕ' + {0x1F5F, {0x03A5, 0x0314, 0x0342}}, // 'Ὗ' + {0x1F60, {0x03C9, 0x0313}}, // 'ὠ' + {0x1F61, {0x03C9, 0x0314}}, // 'ὡ' + {0x1F62, {0x03C9, 0x0313, 0x0300}}, // 'ὢ' + {0x1F63, {0x03C9, 0x0314, 0x0300}}, // 'ὣ' + {0x1F64, {0x03C9, 0x0313, 0x0301}}, // 'ὤ' + {0x1F65, {0x03C9, 0x0314, 0x0301}}, // 'ὥ' + {0x1F66, {0x03C9, 0x0313, 0x0342}}, // 'ὦ' + {0x1F67, {0x03C9, 0x0314, 0x0342}}, // 'ὧ' + {0x1F68, {0x03A9, 0x0313}}, // 'Ὠ' + {0x1F69, {0x03A9, 0x0314}}, // 'Ὡ' + {0x1F6A, {0x03A9, 0x0313, 0x0300}}, // 'Ὢ' + {0x1F6B, {0x03A9, 0x0314, 0x0300}}, // 'Ὣ' + {0x1F6C, {0x03A9, 0x0313, 0x0301}}, // 'Ὤ' + {0x1F6D, {0x03A9, 0x0314, 0x0301}}, // 'Ὥ' + {0x1F6E, {0x03A9, 0x0313, 0x0342}}, // 'Ὦ' + {0x1F6F, {0x03A9, 0x0314, 0x0342}}, // 'Ὧ' + {0x1F70, {0x03B1, 0x0300}}, // 'ὰ' + {0x1F71, {0x03B1, 0x0301}}, // 'ά' + {0x1F72, {0x03B5, 0x0300}}, // 'ὲ' + {0x1F73, {0x03B5, 0x0301}}, // 'έ' + {0x1F74, {0x03B7, 0x0300}}, // 'ὴ' + {0x1F75, {0x03B7, 0x0301}}, // 'ή' + {0x1F76, {0x03B9, 0x0300}}, // 'ὶ' + {0x1F77, {0x03B9, 0x0301}}, // 'ί' + {0x1F78, {0x03BF, 0x0300}}, // 'ὸ' + {0x1F79, {0x03BF, 0x0301}}, // 'ό' + {0x1F7A, {0x03C5, 0x0300}}, // 'ὺ' + {0x1F7B, {0x03C5, 0x0301}}, // 'ύ' + {0x1F7C, {0x03C9, 0x0300}}, // 'ὼ' + {0x1F7D, {0x03C9, 0x0301}}, // 'ώ' + {0x1F80, {0x03B1, 0x0313, 0x0345}}, // 'ᾀ' + {0x1F81, {0x03B1, 0x0314, 0x0345}}, // 'ᾁ' + {0x1F82, {0x03B1, 0x0313, 0x0300, 0x0345}}, // 'ᾂ' + {0x1F83, {0x03B1, 0x0314, 0x0300, 0x0345}}, // 'ᾃ' + {0x1F84, {0x03B1, 0x0313, 0x0301, 0x0345}}, // 'ᾄ' + {0x1F85, {0x03B1, 0x0314, 0x0301, 0x0345}}, // 'ᾅ' + {0x1F86, {0x03B1, 0x0313, 0x0342, 0x0345}}, // 'ᾆ' + {0x1F87, {0x03B1, 0x0314, 0x0342, 0x0345}}, // 'ᾇ' + {0x1F88, {0x0391, 0x0313, 0x0345}}, // 'ᾈ' + {0x1F89, {0x0391, 0x0314, 0x0345}}, // 'ᾉ' + {0x1F8A, {0x0391, 0x0313, 0x0300, 0x0345}}, // 'ᾊ' + {0x1F8B, {0x0391, 0x0314, 0x0300, 0x0345}}, // 'ᾋ' + {0x1F8C, {0x0391, 0x0313, 0x0301, 0x0345}}, // 'ᾌ' + {0x1F8D, {0x0391, 0x0314, 0x0301, 0x0345}}, // 'ᾍ' + {0x1F8E, {0x0391, 0x0313, 0x0342, 0x0345}}, // 'ᾎ' + {0x1F8F, {0x0391, 0x0314, 0x0342, 0x0345}}, // 'ᾏ' + {0x1F90, {0x03B7, 0x0313, 0x0345}}, // 'ᾐ' + {0x1F91, {0x03B7, 0x0314, 0x0345}}, // 'ᾑ' + {0x1F92, {0x03B7, 0x0313, 0x0300, 0x0345}}, // 'ᾒ' + {0x1F93, {0x03B7, 0x0314, 0x0300, 0x0345}}, // 'ᾓ' + {0x1F94, {0x03B7, 0x0313, 0x0301, 0x0345}}, // 'ᾔ' + {0x1F95, {0x03B7, 0x0314, 0x0301, 0x0345}}, // 'ᾕ' + {0x1F96, {0x03B7, 0x0313, 0x0342, 0x0345}}, // 'ᾖ' + {0x1F97, {0x03B7, 0x0314, 0x0342, 0x0345}}, // 'ᾗ' + {0x1F98, {0x0397, 0x0313, 0x0345}}, // 'ᾘ' + {0x1F99, {0x0397, 0x0314, 0x0345}}, // 'ᾙ' + {0x1F9A, {0x0397, 0x0313, 0x0300, 0x0345}}, // 'ᾚ' + {0x1F9B, {0x0397, 0x0314, 0x0300, 0x0345}}, // 'ᾛ' + {0x1F9C, {0x0397, 0x0313, 0x0301, 0x0345}}, // 'ᾜ' + {0x1F9D, {0x0397, 0x0314, 0x0301, 0x0345}}, // 'ᾝ' + {0x1F9E, {0x0397, 0x0313, 0x0342, 0x0345}}, // 'ᾞ' + {0x1F9F, {0x0397, 0x0314, 0x0342, 0x0345}}, // 'ᾟ' + {0x1FA0, {0x03C9, 0x0313, 0x0345}}, // 'ᾠ' + {0x1FA1, {0x03C9, 0x0314, 0x0345}}, // 'ᾡ' + {0x1FA2, {0x03C9, 0x0313, 0x0300, 0x0345}}, // 'ᾢ' + {0x1FA3, {0x03C9, 0x0314, 0x0300, 0x0345}}, // 'ᾣ' + {0x1FA4, {0x03C9, 0x0313, 0x0301, 0x0345}}, // 'ᾤ' + {0x1FA5, {0x03C9, 0x0314, 0x0301, 0x0345}}, // 'ᾥ' + {0x1FA6, {0x03C9, 0x0313, 0x0342, 0x0345}}, // 'ᾦ' + {0x1FA7, {0x03C9, 0x0314, 0x0342, 0x0345}}, // 'ᾧ' + {0x1FA8, {0x03A9, 0x0313, 0x0345}}, // 'ᾨ' + {0x1FA9, {0x03A9, 0x0314, 0x0345}}, // 'ᾩ' + {0x1FAA, {0x03A9, 0x0313, 0x0300, 0x0345}}, // 'ᾪ' + {0x1FAB, {0x03A9, 0x0314, 0x0300, 0x0345}}, // 'ᾫ' + {0x1FAC, {0x03A9, 0x0313, 0x0301, 0x0345}}, // 'ᾬ' + {0x1FAD, {0x03A9, 0x0314, 0x0301, 0x0345}}, // 'ᾭ' + {0x1FAE, {0x03A9, 0x0313, 0x0342, 0x0345}}, // 'ᾮ' + {0x1FAF, {0x03A9, 0x0314, 0x0342, 0x0345}}, // 'ᾯ' + {0x1FB0, {0x03B1, 0x0306}}, // 'ᾰ' + {0x1FB1, {0x03B1, 0x0304}}, // 'ᾱ' + {0x1FB2, {0x03B1, 0x0300, 0x0345}}, // 'ᾲ' + {0x1FB3, {0x03B1, 0x0345}}, // 'ᾳ' + {0x1FB4, {0x03B1, 0x0301, 0x0345}}, // 'ᾴ' + {0x1FB6, {0x03B1, 0x0342}}, // 'ᾶ' + {0x1FB7, {0x03B1, 0x0342, 0x0345}}, // 'ᾷ' + {0x1FB8, {0x0391, 0x0306}}, // 'Ᾰ' + {0x1FB9, {0x0391, 0x0304}}, // 'Ᾱ' + {0x1FBA, {0x0391, 0x0300}}, // 'Ὰ' + {0x1FBB, {0x0391, 0x0301}}, // 'Ά' + {0x1FBC, {0x0391, 0x0345}}, // 'ᾼ' + {0x1FBE, {0x03B9}}, // 'ι' + {0x1FC1, {0x00A8, 0x0342}}, // '῁' + {0x1FC2, {0x03B7, 0x0300, 0x0345}}, // 'ῂ' + {0x1FC3, {0x03B7, 0x0345}}, // 'ῃ' + {0x1FC4, {0x03B7, 0x0301, 0x0345}}, // 'ῄ' + {0x1FC6, {0x03B7, 0x0342}}, // 'ῆ' + {0x1FC7, {0x03B7, 0x0342, 0x0345}}, // 'ῇ' + {0x1FC8, {0x0395, 0x0300}}, // 'Ὲ' + {0x1FC9, {0x0395, 0x0301}}, // 'Έ' + {0x1FCA, {0x0397, 0x0300}}, // 'Ὴ' + {0x1FCB, {0x0397, 0x0301}}, // 'Ή' + {0x1FCC, {0x0397, 0x0345}}, // 'ῌ' + {0x1FCD, {0x1FBF, 0x0300}}, // '῍' + {0x1FCE, {0x1FBF, 0x0301}}, // '῎' + {0x1FCF, {0x1FBF, 0x0342}}, // '῏' + {0x1FD0, {0x03B9, 0x0306}}, // 'ῐ' + {0x1FD1, {0x03B9, 0x0304}}, // 'ῑ' + {0x1FD2, {0x03B9, 0x0308, 0x0300}}, // 'ῒ' + {0x1FD3, {0x03B9, 0x0308, 0x0301}}, // 'ΐ' + {0x1FD6, {0x03B9, 0x0342}}, // 'ῖ' + {0x1FD7, {0x03B9, 0x0308, 0x0342}}, // 'ῗ' + {0x1FD8, {0x0399, 0x0306}}, // 'Ῐ' + {0x1FD9, {0x0399, 0x0304}}, // 'Ῑ' + {0x1FDA, {0x0399, 0x0300}}, // 'Ὶ' + {0x1FDB, {0x0399, 0x0301}}, // 'Ί' + {0x1FDD, {0x1FFE, 0x0300}}, // '῝' + {0x1FDE, {0x1FFE, 0x0301}}, // '῞' + {0x1FDF, {0x1FFE, 0x0342}}, // '῟' + {0x1FE0, {0x03C5, 0x0306}}, // 'ῠ' + {0x1FE1, {0x03C5, 0x0304}}, // 'ῡ' + {0x1FE2, {0x03C5, 0x0308, 0x0300}}, // 'ῢ' + {0x1FE3, {0x03C5, 0x0308, 0x0301}}, // 'ΰ' + {0x1FE4, {0x03C1, 0x0313}}, // 'ῤ' + {0x1FE5, {0x03C1, 0x0314}}, // 'ῥ' + {0x1FE6, {0x03C5, 0x0342}}, // 'ῦ' + {0x1FE7, {0x03C5, 0x0308, 0x0342}}, // 'ῧ' + {0x1FE8, {0x03A5, 0x0306}}, // 'Ῠ' + {0x1FE9, {0x03A5, 0x0304}}, // 'Ῡ' + {0x1FEA, {0x03A5, 0x0300}}, // 'Ὺ' + {0x1FEB, {0x03A5, 0x0301}}, // 'Ύ' + {0x1FEC, {0x03A1, 0x0314}}, // 'Ῥ' + {0x1FED, {0x00A8, 0x0300}}, // '῭' + {0x1FEE, {0x00A8, 0x0301}}, // '΅' + {0x1FEF, {0x0060}}, // '`' + {0x1FF2, {0x03C9, 0x0300, 0x0345}}, // 'ῲ' + {0x1FF3, {0x03C9, 0x0345}}, // 'ῳ' + {0x1FF4, {0x03C9, 0x0301, 0x0345}}, // 'ῴ' + {0x1FF6, {0x03C9, 0x0342}}, // 'ῶ' + {0x1FF7, {0x03C9, 0x0342, 0x0345}}, // 'ῷ' + {0x1FF8, {0x039F, 0x0300}}, // 'Ὸ' + {0x1FF9, {0x039F, 0x0301}}, // 'Ό' + {0x1FFA, {0x03A9, 0x0300}}, // 'Ὼ' + {0x1FFB, {0x03A9, 0x0301}}, // 'Ώ' + {0x1FFC, {0x03A9, 0x0345}}, // 'ῼ' + {0x1FFD, {0x00B4}}, // '´' + {0x2000, {0x2002}}, + {0x2001, {0x2003}}, + {0x2126, {0x03A9}}, // 'Ω' + {0x212A, {0x004B}}, // 'K' + {0x212B, {0x0041, 0x030A}}, // 'Å' + {0x219A, {0x2190, 0x0338}}, // '↚' + {0x219B, {0x2192, 0x0338}}, // '↛' + {0x21AE, {0x2194, 0x0338}}, // '↮' + {0x21CD, {0x21D0, 0x0338}}, // '⇍' + {0x21CE, {0x21D4, 0x0338}}, // '⇎' + {0x21CF, {0x21D2, 0x0338}}, // '⇏' + {0x2204, {0x2203, 0x0338}}, // '∄' + {0x2209, {0x2208, 0x0338}}, // '∉' + {0x220C, {0x220B, 0x0338}}, // '∌' + {0x2224, {0x2223, 0x0338}}, // '∤' + {0x2226, {0x2225, 0x0338}}, // '∦' + {0x2241, {0x223C, 0x0338}}, // '≁' + {0x2244, {0x2243, 0x0338}}, // '≄' + {0x2247, {0x2245, 0x0338}}, // '≇' + {0x2249, {0x2248, 0x0338}}, // '≉' + {0x2260, {0x003D, 0x0338}}, // '≠' + {0x2262, {0x2261, 0x0338}}, // '≢' + {0x226D, {0x224D, 0x0338}}, // '≭' + {0x226E, {0x003C, 0x0338}}, // '≮' + {0x226F, {0x003E, 0x0338}}, // '≯' + {0x2270, {0x2264, 0x0338}}, // '≰' + {0x2271, {0x2265, 0x0338}}, // '≱' + {0x2274, {0x2272, 0x0338}}, // '≴' + {0x2275, {0x2273, 0x0338}}, // '≵' + {0x2278, {0x2276, 0x0338}}, // '≸' + {0x2279, {0x2277, 0x0338}}, // '≹' + {0x2280, {0x227A, 0x0338}}, // '⊀' + {0x2281, {0x227B, 0x0338}}, // '⊁' + {0x2284, {0x2282, 0x0338}}, // '⊄' + {0x2285, {0x2283, 0x0338}}, // '⊅' + {0x2288, {0x2286, 0x0338}}, // '⊈' + {0x2289, {0x2287, 0x0338}}, // '⊉' + {0x22AC, {0x22A2, 0x0338}}, // '⊬' + {0x22AD, {0x22A8, 0x0338}}, // '⊭' + {0x22AE, {0x22A9, 0x0338}}, // '⊮' + {0x22AF, {0x22AB, 0x0338}}, // '⊯' + {0x22E0, {0x227C, 0x0338}}, // '⋠' + {0x22E1, {0x227D, 0x0338}}, // '⋡' + {0x22E2, {0x2291, 0x0338}}, // '⋢' + {0x22E3, {0x2292, 0x0338}}, // '⋣' + {0x22EA, {0x22B2, 0x0338}}, // '⋪' + {0x22EB, {0x22B3, 0x0338}}, // '⋫' + {0x22EC, {0x22B4, 0x0338}}, // '⋬' + {0x22ED, {0x22B5, 0x0338}}, // '⋭' + {0x2329, {0x3008}}, // '〈' + {0x232A, {0x3009}}, // '〉' + {0x2ADC, {0x2ADD, 0x0338}}, // '⫝̸' + {0x304C, {0x304B, 0x3099}}, // 'が' + {0x304E, {0x304D, 0x3099}}, // 'ぎ' + {0x3050, {0x304F, 0x3099}}, // 'ぐ' + {0x3052, {0x3051, 0x3099}}, // 'げ' + {0x3054, {0x3053, 0x3099}}, // 'ご' + {0x3056, {0x3055, 0x3099}}, // 'ざ' + {0x3058, {0x3057, 0x3099}}, // 'じ' + {0x305A, {0x3059, 0x3099}}, // 'ず' + {0x305C, {0x305B, 0x3099}}, // 'ぜ' + {0x305E, {0x305D, 0x3099}}, // 'ぞ' + {0x3060, {0x305F, 0x3099}}, // 'だ' + {0x3062, {0x3061, 0x3099}}, // 'ぢ' + {0x3065, {0x3064, 0x3099}}, // 'づ' + {0x3067, {0x3066, 0x3099}}, // 'で' + {0x3069, {0x3068, 0x3099}}, // 'ど' + {0x3070, {0x306F, 0x3099}}, // 'ば' + {0x3071, {0x306F, 0x309A}}, // 'ぱ' + {0x3073, {0x3072, 0x3099}}, // 'び' + {0x3074, {0x3072, 0x309A}}, // 'ぴ' + {0x3076, {0x3075, 0x3099}}, // 'ぶ' + {0x3077, {0x3075, 0x309A}}, // 'ぷ' + {0x3079, {0x3078, 0x3099}}, // 'べ' + {0x307A, {0x3078, 0x309A}}, // 'ぺ' + {0x307C, {0x307B, 0x3099}}, // 'ぼ' + {0x307D, {0x307B, 0x309A}}, // 'ぽ' + {0x3094, {0x3046, 0x3099}}, // 'ゔ' + {0x309E, {0x309D, 0x3099}}, // 'ゞ' + {0x30AC, {0x30AB, 0x3099}}, // 'ガ' + {0x30AE, {0x30AD, 0x3099}}, // 'ギ' + {0x30B0, {0x30AF, 0x3099}}, // 'グ' + {0x30B2, {0x30B1, 0x3099}}, // 'ゲ' + {0x30B4, {0x30B3, 0x3099}}, // 'ゴ' + {0x30B6, {0x30B5, 0x3099}}, // 'ザ' + {0x30B8, {0x30B7, 0x3099}}, // 'ジ' + {0x30BA, {0x30B9, 0x3099}}, // 'ズ' + {0x30BC, {0x30BB, 0x3099}}, // 'ゼ' + {0x30BE, {0x30BD, 0x3099}}, // 'ゾ' + {0x30C0, {0x30BF, 0x3099}}, // 'ダ' + {0x30C2, {0x30C1, 0x3099}}, // 'ヂ' + {0x30C5, {0x30C4, 0x3099}}, // 'ヅ' + {0x30C7, {0x30C6, 0x3099}}, // 'デ' + {0x30C9, {0x30C8, 0x3099}}, // 'ド' + {0x30D0, {0x30CF, 0x3099}}, // 'バ' + {0x30D1, {0x30CF, 0x309A}}, // 'パ' + {0x30D3, {0x30D2, 0x3099}}, // 'ビ' + {0x30D4, {0x30D2, 0x309A}}, // 'ピ' + {0x30D6, {0x30D5, 0x3099}}, // 'ブ' + {0x30D7, {0x30D5, 0x309A}}, // 'プ' + {0x30D9, {0x30D8, 0x3099}}, // 'ベ' + {0x30DA, {0x30D8, 0x309A}}, // 'ペ' + {0x30DC, {0x30DB, 0x3099}}, // 'ボ' + {0x30DD, {0x30DB, 0x309A}}, // 'ポ' + {0x30F4, {0x30A6, 0x3099}}, // 'ヴ' + {0x30F7, {0x30EF, 0x3099}}, // 'ヷ' + {0x30F8, {0x30F0, 0x3099}}, // 'ヸ' + {0x30F9, {0x30F1, 0x3099}}, // 'ヹ' + {0x30FA, {0x30F2, 0x3099}}, // 'ヺ' + {0x30FE, {0x30FD, 0x3099}}, // 'ヾ' + {0xF900, {0x8C48}}, // '豈' + {0xF901, {0x66F4}}, // '更' + {0xF902, {0x8ECA}}, // '車' + {0xF903, {0x8CC8}}, // '賈' + {0xF904, {0x6ED1}}, // '滑' + {0xF905, {0x4E32}}, // '串' + {0xF906, {0x53E5}}, // '句' + {0xF907, {0x9F9C}}, // '龜' + {0xF908, {0x9F9C}}, // '龜' + {0xF909, {0x5951}}, // '契' + {0xF90A, {0x91D1}}, // '金' + {0xF90B, {0x5587}}, // '喇' + {0xF90C, {0x5948}}, // '奈' + {0xF90D, {0x61F6}}, // '懶' + {0xF90E, {0x7669}}, // '癩' + {0xF90F, {0x7F85}}, // '羅' + {0xF910, {0x863F}}, // '蘿' + {0xF911, {0x87BA}}, // '螺' + {0xF912, {0x88F8}}, // '裸' + {0xF913, {0x908F}}, // '邏' + {0xF914, {0x6A02}}, // '樂' + {0xF915, {0x6D1B}}, // '洛' + {0xF916, {0x70D9}}, // '烙' + {0xF917, {0x73DE}}, // '珞' + {0xF918, {0x843D}}, // '落' + {0xF919, {0x916A}}, // '酪' + {0xF91A, {0x99F1}}, // '駱' + {0xF91B, {0x4E82}}, // '亂' + {0xF91C, {0x5375}}, // '卵' + {0xF91D, {0x6B04}}, // '欄' + {0xF91E, {0x721B}}, // '爛' + {0xF91F, {0x862D}}, // '蘭' + {0xF920, {0x9E1E}}, // '鸞' + {0xF921, {0x5D50}}, // '嵐' + {0xF922, {0x6FEB}}, // '濫' + {0xF923, {0x85CD}}, // '藍' + {0xF924, {0x8964}}, // '襤' + {0xF925, {0x62C9}}, // '拉' + {0xF926, {0x81D8}}, // '臘' + {0xF927, {0x881F}}, // '蠟' + {0xF928, {0x5ECA}}, // '廊' + {0xF929, {0x6717}}, // '朗' + {0xF92A, {0x6D6A}}, // '浪' + {0xF92B, {0x72FC}}, // '狼' + {0xF92C, {0x90CE}}, // '郎' + {0xF92D, {0x4F86}}, // '來' + {0xF92E, {0x51B7}}, // '冷' + {0xF92F, {0x52DE}}, // '勞' + {0xF930, {0x64C4}}, // '擄' + {0xF931, {0x6AD3}}, // '櫓' + {0xF932, {0x7210}}, // '爐' + {0xF933, {0x76E7}}, // '盧' + {0xF934, {0x8001}}, // '老' + {0xF935, {0x8606}}, // '蘆' + {0xF936, {0x865C}}, // '虜' + {0xF937, {0x8DEF}}, // '路' + {0xF938, {0x9732}}, // '露' + {0xF939, {0x9B6F}}, // '魯' + {0xF93A, {0x9DFA}}, // '鷺' + {0xF93B, {0x788C}}, // '碌' + {0xF93C, {0x797F}}, // '祿' + {0xF93D, {0x7DA0}}, // '綠' + {0xF93E, {0x83C9}}, // '菉' + {0xF93F, {0x9304}}, // '錄' + {0xF940, {0x9E7F}}, // '鹿' + {0xF941, {0x8AD6}}, // '論' + {0xF942, {0x58DF}}, // '壟' + {0xF943, {0x5F04}}, // '弄' + {0xF944, {0x7C60}}, // '籠' + {0xF945, {0x807E}}, // '聾' + {0xF946, {0x7262}}, // '牢' + {0xF947, {0x78CA}}, // '磊' + {0xF948, {0x8CC2}}, // '賂' + {0xF949, {0x96F7}}, // '雷' + {0xF94A, {0x58D8}}, // '壘' + {0xF94B, {0x5C62}}, // '屢' + {0xF94C, {0x6A13}}, // '樓' + {0xF94D, {0x6DDA}}, // '淚' + {0xF94E, {0x6F0F}}, // '漏' + {0xF94F, {0x7D2F}}, // '累' + {0xF950, {0x7E37}}, // '縷' + {0xF951, {0x964B}}, // '陋' + {0xF952, {0x52D2}}, // '勒' + {0xF953, {0x808B}}, // '肋' + {0xF954, {0x51DC}}, // '凜' + {0xF955, {0x51CC}}, // '凌' + {0xF956, {0x7A1C}}, // '稜' + {0xF957, {0x7DBE}}, // '綾' + {0xF958, {0x83F1}}, // '菱' + {0xF959, {0x9675}}, // '陵' + {0xF95A, {0x8B80}}, // '讀' + {0xF95B, {0x62CF}}, // '拏' + {0xF95C, {0x6A02}}, // '樂' + {0xF95D, {0x8AFE}}, // '諾' + {0xF95E, {0x4E39}}, // '丹' + {0xF95F, {0x5BE7}}, // '寧' + {0xF960, {0x6012}}, // '怒' + {0xF961, {0x7387}}, // '率' + {0xF962, {0x7570}}, // '異' + {0xF963, {0x5317}}, // '北' + {0xF964, {0x78FB}}, // '磻' + {0xF965, {0x4FBF}}, // '便' + {0xF966, {0x5FA9}}, // '復' + {0xF967, {0x4E0D}}, // '不' + {0xF968, {0x6CCC}}, // '泌' + {0xF969, {0x6578}}, // '數' + {0xF96A, {0x7D22}}, // '索' + {0xF96B, {0x53C3}}, // '參' + {0xF96C, {0x585E}}, // '塞' + {0xF96D, {0x7701}}, // '省' + {0xF96E, {0x8449}}, // '葉' + {0xF96F, {0x8AAA}}, // '說' + {0xF970, {0x6BBA}}, // '殺' + {0xF971, {0x8FB0}}, // '辰' + {0xF972, {0x6C88}}, // '沈' + {0xF973, {0x62FE}}, // '拾' + {0xF974, {0x82E5}}, // '若' + {0xF975, {0x63A0}}, // '掠' + {0xF976, {0x7565}}, // '略' + {0xF977, {0x4EAE}}, // '亮' + {0xF978, {0x5169}}, // '兩' + {0xF979, {0x51C9}}, // '凉' + {0xF97A, {0x6881}}, // '梁' + {0xF97B, {0x7CE7}}, // '糧' + {0xF97C, {0x826F}}, // '良' + {0xF97D, {0x8AD2}}, // '諒' + {0xF97E, {0x91CF}}, // '量' + {0xF97F, {0x52F5}}, // '勵' + {0xF980, {0x5442}}, // '呂' + {0xF981, {0x5973}}, // '女' + {0xF982, {0x5EEC}}, // '廬' + {0xF983, {0x65C5}}, // '旅' + {0xF984, {0x6FFE}}, // '濾' + {0xF985, {0x792A}}, // '礪' + {0xF986, {0x95AD}}, // '閭' + {0xF987, {0x9A6A}}, // '驪' + {0xF988, {0x9E97}}, // '麗' + {0xF989, {0x9ECE}}, // '黎' + {0xF98A, {0x529B}}, // '力' + {0xF98B, {0x66C6}}, // '曆' + {0xF98C, {0x6B77}}, // '歷' + {0xF98D, {0x8F62}}, // '轢' + {0xF98E, {0x5E74}}, // '年' + {0xF98F, {0x6190}}, // '憐' + {0xF990, {0x6200}}, // '戀' + {0xF991, {0x649A}}, // '撚' + {0xF992, {0x6F23}}, // '漣' + {0xF993, {0x7149}}, // '煉' + {0xF994, {0x7489}}, // '璉' + {0xF995, {0x79CA}}, // '秊' + {0xF996, {0x7DF4}}, // '練' + {0xF997, {0x806F}}, // '聯' + {0xF998, {0x8F26}}, // '輦' + {0xF999, {0x84EE}}, // '蓮' + {0xF99A, {0x9023}}, // '連' + {0xF99B, {0x934A}}, // '鍊' + {0xF99C, {0x5217}}, // '列' + {0xF99D, {0x52A3}}, // '劣' + {0xF99E, {0x54BD}}, // '咽' + {0xF99F, {0x70C8}}, // '烈' + {0xF9A0, {0x88C2}}, // '裂' + {0xF9A1, {0x8AAA}}, // '說' + {0xF9A2, {0x5EC9}}, // '廉' + {0xF9A3, {0x5FF5}}, // '念' + {0xF9A4, {0x637B}}, // '捻' + {0xF9A5, {0x6BAE}}, // '殮' + {0xF9A6, {0x7C3E}}, // '簾' + {0xF9A7, {0x7375}}, // '獵' + {0xF9A8, {0x4EE4}}, // '令' + {0xF9A9, {0x56F9}}, // '囹' + {0xF9AA, {0x5BE7}}, // '寧' + {0xF9AB, {0x5DBA}}, // '嶺' + {0xF9AC, {0x601C}}, // '怜' + {0xF9AD, {0x73B2}}, // '玲' + {0xF9AE, {0x7469}}, // '瑩' + {0xF9AF, {0x7F9A}}, // '羚' + {0xF9B0, {0x8046}}, // '聆' + {0xF9B1, {0x9234}}, // '鈴' + {0xF9B2, {0x96F6}}, // '零' + {0xF9B3, {0x9748}}, // '靈' + {0xF9B4, {0x9818}}, // '領' + {0xF9B5, {0x4F8B}}, // '例' + {0xF9B6, {0x79AE}}, // '禮' + {0xF9B7, {0x91B4}}, // '醴' + {0xF9B8, {0x96B8}}, // '隸' + {0xF9B9, {0x60E1}}, // '惡' + {0xF9BA, {0x4E86}}, // '了' + {0xF9BB, {0x50DA}}, // '僚' + {0xF9BC, {0x5BEE}}, // '寮' + {0xF9BD, {0x5C3F}}, // '尿' + {0xF9BE, {0x6599}}, // '料' + {0xF9BF, {0x6A02}}, // '樂' + {0xF9C0, {0x71CE}}, // '燎' + {0xF9C1, {0x7642}}, // '療' + {0xF9C2, {0x84FC}}, // '蓼' + {0xF9C3, {0x907C}}, // '遼' + {0xF9C4, {0x9F8D}}, // '龍' + {0xF9C5, {0x6688}}, // '暈' + {0xF9C6, {0x962E}}, // '阮' + {0xF9C7, {0x5289}}, // '劉' + {0xF9C8, {0x677B}}, // '杻' + {0xF9C9, {0x67F3}}, // '柳' + {0xF9CA, {0x6D41}}, // '流' + {0xF9CB, {0x6E9C}}, // '溜' + {0xF9CC, {0x7409}}, // '琉' + {0xF9CD, {0x7559}}, // '留' + {0xF9CE, {0x786B}}, // '硫' + {0xF9CF, {0x7D10}}, // '紐' + {0xF9D0, {0x985E}}, // '類' + {0xF9D1, {0x516D}}, // '六' + {0xF9D2, {0x622E}}, // '戮' + {0xF9D3, {0x9678}}, // '陸' + {0xF9D4, {0x502B}}, // '倫' + {0xF9D5, {0x5D19}}, // '崙' + {0xF9D6, {0x6DEA}}, // '淪' + {0xF9D7, {0x8F2A}}, // '輪' + {0xF9D8, {0x5F8B}}, // '律' + {0xF9D9, {0x6144}}, // '慄' + {0xF9DA, {0x6817}}, // '栗' + {0xF9DB, {0x7387}}, // '率' + {0xF9DC, {0x9686}}, // '隆' + {0xF9DD, {0x5229}}, // '利' + {0xF9DE, {0x540F}}, // '吏' + {0xF9DF, {0x5C65}}, // '履' + {0xF9E0, {0x6613}}, // '易' + {0xF9E1, {0x674E}}, // '李' + {0xF9E2, {0x68A8}}, // '梨' + {0xF9E3, {0x6CE5}}, // '泥' + {0xF9E4, {0x7406}}, // '理' + {0xF9E5, {0x75E2}}, // '痢' + {0xF9E6, {0x7F79}}, // '罹' + {0xF9E7, {0x88CF}}, // '裏' + {0xF9E8, {0x88E1}}, // '裡' + {0xF9E9, {0x91CC}}, // '里' + {0xF9EA, {0x96E2}}, // '離' + {0xF9EB, {0x533F}}, // '匿' + {0xF9EC, {0x6EBA}}, // '溺' + {0xF9ED, {0x541D}}, // '吝' + {0xF9EE, {0x71D0}}, // '燐' + {0xF9EF, {0x7498}}, // '璘' + {0xF9F0, {0x85FA}}, // '藺' + {0xF9F1, {0x96A3}}, // '隣' + {0xF9F2, {0x9C57}}, // '鱗' + {0xF9F3, {0x9E9F}}, // '麟' + {0xF9F4, {0x6797}}, // '林' + {0xF9F5, {0x6DCB}}, // '淋' + {0xF9F6, {0x81E8}}, // '臨' + {0xF9F7, {0x7ACB}}, // '立' + {0xF9F8, {0x7B20}}, // '笠' + {0xF9F9, {0x7C92}}, // '粒' + {0xF9FA, {0x72C0}}, // '狀' + {0xF9FB, {0x7099}}, // '炙' + {0xF9FC, {0x8B58}}, // '識' + {0xF9FD, {0x4EC0}}, // '什' + {0xF9FE, {0x8336}}, // '茶' + {0xF9FF, {0x523A}}, // '刺' + {0xFA00, {0x5207}}, // '切' + {0xFA01, {0x5EA6}}, // '度' + {0xFA02, {0x62D3}}, // '拓' + {0xFA03, {0x7CD6}}, // '糖' + {0xFA04, {0x5B85}}, // '宅' + {0xFA05, {0x6D1E}}, // '洞' + {0xFA06, {0x66B4}}, // '暴' + {0xFA07, {0x8F3B}}, // '輻' + {0xFA08, {0x884C}}, // '行' + {0xFA09, {0x964D}}, // '降' + {0xFA0A, {0x898B}}, // '見' + {0xFA0B, {0x5ED3}}, // '廓' + {0xFA0C, {0x5140}}, // '兀' + {0xFA0D, {0x55C0}}, // '嗀' + {0xFA10, {0x585A}}, // '塚' + {0xFA12, {0x6674}}, // '晴' + {0xFA15, {0x51DE}}, // '凞' + {0xFA16, {0x732A}}, // '猪' + {0xFA17, {0x76CA}}, // '益' + {0xFA18, {0x793C}}, // '礼' + {0xFA19, {0x795E}}, // '神' + {0xFA1A, {0x7965}}, // '祥' + {0xFA1B, {0x798F}}, // '福' + {0xFA1C, {0x9756}}, // '靖' + {0xFA1D, {0x7CBE}}, // '精' + {0xFA1E, {0x7FBD}}, // '羽' + {0xFA20, {0x8612}}, // '蘒' + {0xFA22, {0x8AF8}}, // '諸' + {0xFA25, {0x9038}}, // '逸' + {0xFA26, {0x90FD}}, // '都' + {0xFA2A, {0x98EF}}, // '飯' + {0xFA2B, {0x98FC}}, // '飼' + {0xFA2C, {0x9928}}, // '館' + {0xFA2D, {0x9DB4}}, // '鶴' + {0xFA2E, {0x90DE}}, // '郞' + {0xFA2F, {0x96B7}}, // '隷' + {0xFA30, {0x4FAE}}, // '侮' + {0xFA31, {0x50E7}}, // '僧' + {0xFA32, {0x514D}}, // '免' + {0xFA33, {0x52C9}}, // '勉' + {0xFA34, {0x52E4}}, // '勤' + {0xFA35, {0x5351}}, // '卑' + {0xFA36, {0x559D}}, // '喝' + {0xFA37, {0x5606}}, // '嘆' + {0xFA38, {0x5668}}, // '器' + {0xFA39, {0x5840}}, // '塀' + {0xFA3A, {0x58A8}}, // '墨' + {0xFA3B, {0x5C64}}, // '層' + {0xFA3C, {0x5C6E}}, // '屮' + {0xFA3D, {0x6094}}, // '悔' + {0xFA3E, {0x6168}}, // '慨' + {0xFA3F, {0x618E}}, // '憎' + {0xFA40, {0x61F2}}, // '懲' + {0xFA41, {0x654F}}, // '敏' + {0xFA42, {0x65E2}}, // '既' + {0xFA43, {0x6691}}, // '暑' + {0xFA44, {0x6885}}, // '梅' + {0xFA45, {0x6D77}}, // '海' + {0xFA46, {0x6E1A}}, // '渚' + {0xFA47, {0x6F22}}, // '漢' + {0xFA48, {0x716E}}, // '煮' + {0xFA49, {0x722B}}, // '爫' + {0xFA4A, {0x7422}}, // '琢' + {0xFA4B, {0x7891}}, // '碑' + {0xFA4C, {0x793E}}, // '社' + {0xFA4D, {0x7949}}, // '祉' + {0xFA4E, {0x7948}}, // '祈' + {0xFA4F, {0x7950}}, // '祐' + {0xFA50, {0x7956}}, // '祖' + {0xFA51, {0x795D}}, // '祝' + {0xFA52, {0x798D}}, // '禍' + {0xFA53, {0x798E}}, // '禎' + {0xFA54, {0x7A40}}, // '穀' + {0xFA55, {0x7A81}}, // '突' + {0xFA56, {0x7BC0}}, // '節' + {0xFA57, {0x7DF4}}, // '練' + {0xFA58, {0x7E09}}, // '縉' + {0xFA59, {0x7E41}}, // '繁' + {0xFA5A, {0x7F72}}, // '署' + {0xFA5B, {0x8005}}, // '者' + {0xFA5C, {0x81ED}}, // '臭' + {0xFA5D, {0x8279}}, // '艹' + {0xFA5E, {0x8279}}, // '艹' + {0xFA5F, {0x8457}}, // '著' + {0xFA60, {0x8910}}, // '褐' + {0xFA61, {0x8996}}, // '視' + {0xFA62, {0x8B01}}, // '謁' + {0xFA63, {0x8B39}}, // '謹' + {0xFA64, {0x8CD3}}, // '賓' + {0xFA65, {0x8D08}}, // '贈' + {0xFA66, {0x8FB6}}, // '辶' + {0xFA67, {0x9038}}, // '逸' + {0xFA68, {0x96E3}}, // '難' + {0xFA69, {0x97FF}}, // '響' + {0xFA6A, {0x983B}}, // '頻' + {0xFA6B, {0x6075}}, // '恵' + {0xFA6C, {0x242EE}}, // '𤋮' + {0xFA6D, {0x8218}}, // '舘' + {0xFA70, {0x4E26}}, // '並' + {0xFA71, {0x51B5}}, // '况' + {0xFA72, {0x5168}}, // '全' + {0xFA73, {0x4F80}}, // '侀' + {0xFA74, {0x5145}}, // '充' + {0xFA75, {0x5180}}, // '冀' + {0xFA76, {0x52C7}}, // '勇' + {0xFA77, {0x52FA}}, // '勺' + {0xFA78, {0x559D}}, // '喝' + {0xFA79, {0x5555}}, // '啕' + {0xFA7A, {0x5599}}, // '喙' + {0xFA7B, {0x55E2}}, // '嗢' + {0xFA7C, {0x585A}}, // '塚' + {0xFA7D, {0x58B3}}, // '墳' + {0xFA7E, {0x5944}}, // '奄' + {0xFA7F, {0x5954}}, // '奔' + {0xFA80, {0x5A62}}, // '婢' + {0xFA81, {0x5B28}}, // '嬨' + {0xFA82, {0x5ED2}}, // '廒' + {0xFA83, {0x5ED9}}, // '廙' + {0xFA84, {0x5F69}}, // '彩' + {0xFA85, {0x5FAD}}, // '徭' + {0xFA86, {0x60D8}}, // '惘' + {0xFA87, {0x614E}}, // '慎' + {0xFA88, {0x6108}}, // '愈' + {0xFA89, {0x618E}}, // '憎' + {0xFA8A, {0x6160}}, // '慠' + {0xFA8B, {0x61F2}}, // '懲' + {0xFA8C, {0x6234}}, // '戴' + {0xFA8D, {0x63C4}}, // '揄' + {0xFA8E, {0x641C}}, // '搜' + {0xFA8F, {0x6452}}, // '摒' + {0xFA90, {0x6556}}, // '敖' + {0xFA91, {0x6674}}, // '晴' + {0xFA92, {0x6717}}, // '朗' + {0xFA93, {0x671B}}, // '望' + {0xFA94, {0x6756}}, // '杖' + {0xFA95, {0x6B79}}, // '歹' + {0xFA96, {0x6BBA}}, // '殺' + {0xFA97, {0x6D41}}, // '流' + {0xFA98, {0x6EDB}}, // '滛' + {0xFA99, {0x6ECB}}, // '滋' + {0xFA9A, {0x6F22}}, // '漢' + {0xFA9B, {0x701E}}, // '瀞' + {0xFA9C, {0x716E}}, // '煮' + {0xFA9D, {0x77A7}}, // '瞧' + {0xFA9E, {0x7235}}, // '爵' + {0xFA9F, {0x72AF}}, // '犯' + {0xFAA0, {0x732A}}, // '猪' + {0xFAA1, {0x7471}}, // '瑱' + {0xFAA2, {0x7506}}, // '甆' + {0xFAA3, {0x753B}}, // '画' + {0xFAA4, {0x761D}}, // '瘝' + {0xFAA5, {0x761F}}, // '瘟' + {0xFAA6, {0x76CA}}, // '益' + {0xFAA7, {0x76DB}}, // '盛' + {0xFAA8, {0x76F4}}, // '直' + {0xFAA9, {0x774A}}, // '睊' + {0xFAAA, {0x7740}}, // '着' + {0xFAAB, {0x78CC}}, // '磌' + {0xFAAC, {0x7AB1}}, // '窱' + {0xFAAD, {0x7BC0}}, // '節' + {0xFAAE, {0x7C7B}}, // '类' + {0xFAAF, {0x7D5B}}, // '絛' + {0xFAB0, {0x7DF4}}, // '練' + {0xFAB1, {0x7F3E}}, // '缾' + {0xFAB2, {0x8005}}, // '者' + {0xFAB3, {0x8352}}, // '荒' + {0xFAB4, {0x83EF}}, // '華' + {0xFAB5, {0x8779}}, // '蝹' + {0xFAB6, {0x8941}}, // '襁' + {0xFAB7, {0x8986}}, // '覆' + {0xFAB8, {0x8996}}, // '視' + {0xFAB9, {0x8ABF}}, // '調' + {0xFABA, {0x8AF8}}, // '諸' + {0xFABB, {0x8ACB}}, // '請' + {0xFABC, {0x8B01}}, // '謁' + {0xFABD, {0x8AFE}}, // '諾' + {0xFABE, {0x8AED}}, // '諭' + {0xFABF, {0x8B39}}, // '謹' + {0xFAC0, {0x8B8A}}, // '變' + {0xFAC1, {0x8D08}}, // '贈' + {0xFAC2, {0x8F38}}, // '輸' + {0xFAC3, {0x9072}}, // '遲' + {0xFAC4, {0x9199}}, // '醙' + {0xFAC5, {0x9276}}, // '鉶' + {0xFAC6, {0x967C}}, // '陼' + {0xFAC7, {0x96E3}}, // '難' + {0xFAC8, {0x9756}}, // '靖' + {0xFAC9, {0x97DB}}, // '韛' + {0xFACA, {0x97FF}}, // '響' + {0xFACB, {0x980B}}, // '頋' + {0xFACC, {0x983B}}, // '頻' + {0xFACD, {0x9B12}}, // '鬒' + {0xFACE, {0x9F9C}}, // '龜' + {0xFACF, {0x2284A}}, // '𢡊' + {0xFAD0, {0x22844}}, // '𢡄' + {0xFAD1, {0x233D5}}, // '𣏕' + {0xFAD2, {0x3B9D}}, // '㮝' + {0xFAD3, {0x4018}}, // '䀘' + {0xFAD4, {0x4039}}, // '䀹' + {0xFAD5, {0x25249}}, // '𥉉' + {0xFAD6, {0x25CD0}}, // '𥳐' + {0xFAD7, {0x27ED3}}, // '𧻓' + {0xFAD8, {0x9F43}}, // '齃' + {0xFAD9, {0x9F8E}}, // '龎' + {0xFB1D, {0x05D9, 0x05B4}}, // 'יִ' + {0xFB1F, {0x05F2, 0x05B7}}, // 'ײַ' + {0xFB2A, {0x05E9, 0x05C1}}, // 'שׁ' + {0xFB2B, {0x05E9, 0x05C2}}, // 'שׂ' + {0xFB2C, {0x05E9, 0x05BC, 0x05C1}}, // 'שּׁ' + {0xFB2D, {0x05E9, 0x05BC, 0x05C2}}, // 'שּׂ' + {0xFB2E, {0x05D0, 0x05B7}}, // 'אַ' + {0xFB2F, {0x05D0, 0x05B8}}, // 'אָ' + {0xFB30, {0x05D0, 0x05BC}}, // 'אּ' + {0xFB31, {0x05D1, 0x05BC}}, // 'בּ' + {0xFB32, {0x05D2, 0x05BC}}, // 'גּ' + {0xFB33, {0x05D3, 0x05BC}}, // 'דּ' + {0xFB34, {0x05D4, 0x05BC}}, // 'הּ' + {0xFB35, {0x05D5, 0x05BC}}, // 'וּ' + {0xFB36, {0x05D6, 0x05BC}}, // 'זּ' + {0xFB38, {0x05D8, 0x05BC}}, // 'טּ' + {0xFB39, {0x05D9, 0x05BC}}, // 'יּ' + {0xFB3A, {0x05DA, 0x05BC}}, // 'ךּ' + {0xFB3B, {0x05DB, 0x05BC}}, // 'כּ' + {0xFB3C, {0x05DC, 0x05BC}}, // 'לּ' + {0xFB3E, {0x05DE, 0x05BC}}, // 'מּ' + {0xFB40, {0x05E0, 0x05BC}}, // 'נּ' + {0xFB41, {0x05E1, 0x05BC}}, // 'סּ' + {0xFB43, {0x05E3, 0x05BC}}, // 'ףּ' + {0xFB44, {0x05E4, 0x05BC}}, // 'פּ' + {0xFB46, {0x05E6, 0x05BC}}, // 'צּ' + {0xFB47, {0x05E7, 0x05BC}}, // 'קּ' + {0xFB48, {0x05E8, 0x05BC}}, // 'רּ' + {0xFB49, {0x05E9, 0x05BC}}, // 'שּ' + {0xFB4A, {0x05EA, 0x05BC}}, // 'תּ' + {0xFB4B, {0x05D5, 0x05B9}}, // 'וֹ' + {0xFB4C, {0x05D1, 0x05BF}}, // 'בֿ' + {0xFB4D, {0x05DB, 0x05BF}}, // 'כֿ' + {0xFB4E, {0x05E4, 0x05BF}}, // 'פֿ' + {0x105C9, {0x105D2, 0x0307}}, + {0x105E4, {0x105DA, 0x0307}}, + {0x1109A, {0x11099, 0x110BA}}, // '𑂚' + {0x1109C, {0x1109B, 0x110BA}}, // '𑂜' + {0x110AB, {0x110A5, 0x110BA}}, // '𑂫' + {0x1112E, {0x11131, 0x11127}}, // '𑄮' + {0x1112F, {0x11132, 0x11127}}, // '𑄯' + {0x1134B, {0x11347, 0x1133E}}, // '𑍋' + {0x1134C, {0x11347, 0x11357}}, // '𑍌' + {0x11383, {0x11382, 0x113C9}}, + {0x11385, {0x11384, 0x113BB}}, + {0x1138E, {0x1138B, 0x113C2}}, + {0x11391, {0x11390, 0x113C9}}, + {0x113C5, {0x113C2, 0x113C2}}, + {0x113C7, {0x113C2, 0x113B8}}, + {0x113C8, {0x113C2, 0x113C9}}, + {0x114BB, {0x114B9, 0x114BA}}, // '𑒻' + {0x114BC, {0x114B9, 0x114B0}}, // '𑒼' + {0x114BE, {0x114B9, 0x114BD}}, // '𑒾' + {0x115BA, {0x115B8, 0x115AF}}, // '𑖺' + {0x115BB, {0x115B9, 0x115AF}}, // '𑖻' + {0x11938, {0x11935, 0x11930}}, // '𑤸' + {0x16121, {0x1611E, 0x1611E}}, + {0x16122, {0x1611E, 0x16129}}, + {0x16123, {0x1611E, 0x1611F}}, + {0x16124, {0x16129, 0x1611F}}, + {0x16125, {0x1611E, 0x16120}}, + {0x16126, {0x1611E, 0x1611E, 0x1611F}}, + {0x16127, {0x1611E, 0x16129, 0x1611F}}, + {0x16128, {0x1611E, 0x1611E, 0x16120}}, + {0x16D68, {0x16D67, 0x16D67}}, + {0x16D69, {0x16D63, 0x16D67}}, + {0x16D6A, {0x16D63, 0x16D67, 0x16D67}}, + {0x1D15E, {0x1D157, 0x1D165}}, // '𝅗𝅥' + {0x1D15F, {0x1D158, 0x1D165}}, // '𝅘𝅥' + {0x1D160, {0x1D158, 0x1D165, 0x1D16E}}, // '𝅘𝅥𝅮' + {0x1D161, {0x1D158, 0x1D165, 0x1D16F}}, // '𝅘𝅥𝅯' + {0x1D162, {0x1D158, 0x1D165, 0x1D170}}, // '𝅘𝅥𝅰' + {0x1D163, {0x1D158, 0x1D165, 0x1D171}}, // '𝅘𝅥𝅱' + {0x1D164, {0x1D158, 0x1D165, 0x1D172}}, // '𝅘𝅥𝅲' + {0x1D1BB, {0x1D1B9, 0x1D165}}, // '𝆹𝅥' + {0x1D1BC, {0x1D1BA, 0x1D165}}, // '𝆺𝅥' + {0x1D1BD, {0x1D1B9, 0x1D165, 0x1D16E}}, // '𝆹𝅥𝅮' + {0x1D1BE, {0x1D1BA, 0x1D165, 0x1D16E}}, // '𝆺𝅥𝅮' + {0x1D1BF, {0x1D1B9, 0x1D165, 0x1D16F}}, // '𝆹𝅥𝅯' + {0x1D1C0, {0x1D1BA, 0x1D165, 0x1D16F}}, // '𝆺𝅥𝅯' + {0x2F800, {0x4E3D}}, // '丽' + {0x2F801, {0x4E38}}, // '丸' + {0x2F802, {0x4E41}}, // '乁' + {0x2F803, {0x20122}}, // '𠄢' + {0x2F804, {0x4F60}}, // '你' + {0x2F805, {0x4FAE}}, // '侮' + {0x2F806, {0x4FBB}}, // '侻' + {0x2F807, {0x5002}}, // '倂' + {0x2F808, {0x507A}}, // '偺' + {0x2F809, {0x5099}}, // '備' + {0x2F80A, {0x50E7}}, // '僧' + {0x2F80B, {0x50CF}}, // '像' + {0x2F80C, {0x349E}}, // '㒞' + {0x2F80D, {0x2063A}}, // '𠘺' + {0x2F80E, {0x514D}}, // '免' + {0x2F80F, {0x5154}}, // '兔' + {0x2F810, {0x5164}}, // '兤' + {0x2F811, {0x5177}}, // '具' + {0x2F812, {0x2051C}}, // '𠔜' + {0x2F813, {0x34B9}}, // '㒹' + {0x2F814, {0x5167}}, // '內' + {0x2F815, {0x518D}}, // '再' + {0x2F816, {0x2054B}}, // '𠕋' + {0x2F817, {0x5197}}, // '冗' + {0x2F818, {0x51A4}}, // '冤' + {0x2F819, {0x4ECC}}, // '仌' + {0x2F81A, {0x51AC}}, // '冬' + {0x2F81B, {0x51B5}}, // '况' + {0x2F81C, {0x291DF}}, // '𩇟' + {0x2F81D, {0x51F5}}, // '凵' + {0x2F81E, {0x5203}}, // '刃' + {0x2F81F, {0x34DF}}, // '㓟' + {0x2F820, {0x523B}}, // '刻' + {0x2F821, {0x5246}}, // '剆' + {0x2F822, {0x5272}}, // '割' + {0x2F823, {0x5277}}, // '剷' + {0x2F824, {0x3515}}, // '㔕' + {0x2F825, {0x52C7}}, // '勇' + {0x2F826, {0x52C9}}, // '勉' + {0x2F827, {0x52E4}}, // '勤' + {0x2F828, {0x52FA}}, // '勺' + {0x2F829, {0x5305}}, // '包' + {0x2F82A, {0x5306}}, // '匆' + {0x2F82B, {0x5317}}, // '北' + {0x2F82C, {0x5349}}, // '卉' + {0x2F82D, {0x5351}}, // '卑' + {0x2F82E, {0x535A}}, // '博' + {0x2F82F, {0x5373}}, // '即' + {0x2F830, {0x537D}}, // '卽' + {0x2F831, {0x537F}}, // '卿' + {0x2F832, {0x537F}}, // '卿' + {0x2F833, {0x537F}}, // '卿' + {0x2F834, {0x20A2C}}, // '𠨬' + {0x2F835, {0x7070}}, // '灰' + {0x2F836, {0x53CA}}, // '及' + {0x2F837, {0x53DF}}, // '叟' + {0x2F838, {0x20B63}}, // '𠭣' + {0x2F839, {0x53EB}}, // '叫' + {0x2F83A, {0x53F1}}, // '叱' + {0x2F83B, {0x5406}}, // '吆' + {0x2F83C, {0x549E}}, // '咞' + {0x2F83D, {0x5438}}, // '吸' + {0x2F83E, {0x5448}}, // '呈' + {0x2F83F, {0x5468}}, // '周' + {0x2F840, {0x54A2}}, // '咢' + {0x2F841, {0x54F6}}, // '哶' + {0x2F842, {0x5510}}, // '唐' + {0x2F843, {0x5553}}, // '啓' + {0x2F844, {0x5563}}, // '啣' + {0x2F845, {0x5584}}, // '善' + {0x2F846, {0x5584}}, // '善' + {0x2F847, {0x5599}}, // '喙' + {0x2F848, {0x55AB}}, // '喫' + {0x2F849, {0x55B3}}, // '喳' + {0x2F84A, {0x55C2}}, // '嗂' + {0x2F84B, {0x5716}}, // '圖' + {0x2F84C, {0x5606}}, // '嘆' + {0x2F84D, {0x5717}}, // '圗' + {0x2F84E, {0x5651}}, // '噑' + {0x2F84F, {0x5674}}, // '噴' + {0x2F850, {0x5207}}, // '切' + {0x2F851, {0x58EE}}, // '壮' + {0x2F852, {0x57CE}}, // '城' + {0x2F853, {0x57F4}}, // '埴' + {0x2F854, {0x580D}}, // '堍' + {0x2F855, {0x578B}}, // '型' + {0x2F856, {0x5832}}, // '堲' + {0x2F857, {0x5831}}, // '報' + {0x2F858, {0x58AC}}, // '墬' + {0x2F859, {0x214E4}}, // '𡓤' + {0x2F85A, {0x58F2}}, // '売' + {0x2F85B, {0x58F7}}, // '壷' + {0x2F85C, {0x5906}}, // '夆' + {0x2F85D, {0x591A}}, // '多' + {0x2F85E, {0x5922}}, // '夢' + {0x2F85F, {0x5962}}, // '奢' + {0x2F860, {0x216A8}}, // '𡚨' + {0x2F861, {0x216EA}}, // '𡛪' + {0x2F862, {0x59EC}}, // '姬' + {0x2F863, {0x5A1B}}, // '娛' + {0x2F864, {0x5A27}}, // '娧' + {0x2F865, {0x59D8}}, // '姘' + {0x2F866, {0x5A66}}, // '婦' + {0x2F867, {0x36EE}}, // '㛮' + {0x2F868, {0x36FC}}, // '㛼' + {0x2F869, {0x5B08}}, // '嬈' + {0x2F86A, {0x5B3E}}, // '嬾' + {0x2F86B, {0x5B3E}}, // '嬾' + {0x2F86C, {0x219C8}}, // '𡧈' + {0x2F86D, {0x5BC3}}, // '寃' + {0x2F86E, {0x5BD8}}, // '寘' + {0x2F86F, {0x5BE7}}, // '寧' + {0x2F870, {0x5BF3}}, // '寳' + {0x2F871, {0x21B18}}, // '𡬘' + {0x2F872, {0x5BFF}}, // '寿' + {0x2F873, {0x5C06}}, // '将' + {0x2F874, {0x5F53}}, // '当' + {0x2F875, {0x5C22}}, // '尢' + {0x2F876, {0x3781}}, // '㞁' + {0x2F877, {0x5C60}}, // '屠' + {0x2F878, {0x5C6E}}, // '屮' + {0x2F879, {0x5CC0}}, // '峀' + {0x2F87A, {0x5C8D}}, // '岍' + {0x2F87B, {0x21DE4}}, // '𡷤' + {0x2F87C, {0x5D43}}, // '嵃' + {0x2F87D, {0x21DE6}}, // '𡷦' + {0x2F87E, {0x5D6E}}, // '嵮' + {0x2F87F, {0x5D6B}}, // '嵫' + {0x2F880, {0x5D7C}}, // '嵼' + {0x2F881, {0x5DE1}}, // '巡' + {0x2F882, {0x5DE2}}, // '巢' + {0x2F883, {0x382F}}, // '㠯' + {0x2F884, {0x5DFD}}, // '巽' + {0x2F885, {0x5E28}}, // '帨' + {0x2F886, {0x5E3D}}, // '帽' + {0x2F887, {0x5E69}}, // '幩' + {0x2F888, {0x3862}}, // '㡢' + {0x2F889, {0x22183}}, // '𢆃' + {0x2F88A, {0x387C}}, // '㡼' + {0x2F88B, {0x5EB0}}, // '庰' + {0x2F88C, {0x5EB3}}, // '庳' + {0x2F88D, {0x5EB6}}, // '庶' + {0x2F88E, {0x5ECA}}, // '廊' + {0x2F88F, {0x2A392}}, // '𪎒' + {0x2F890, {0x5EFE}}, // '廾' + {0x2F891, {0x22331}}, // '𢌱' + {0x2F892, {0x22331}}, // '𢌱' + {0x2F893, {0x8201}}, // '舁' + {0x2F894, {0x5F22}}, // '弢' + {0x2F895, {0x5F22}}, // '弢' + {0x2F896, {0x38C7}}, // '㣇' + {0x2F897, {0x232B8}}, // '𣊸' + {0x2F898, {0x261DA}}, // '𦇚' + {0x2F899, {0x5F62}}, // '形' + {0x2F89A, {0x5F6B}}, // '彫' + {0x2F89B, {0x38E3}}, // '㣣' + {0x2F89C, {0x5F9A}}, // '徚' + {0x2F89D, {0x5FCD}}, // '忍' + {0x2F89E, {0x5FD7}}, // '志' + {0x2F89F, {0x5FF9}}, // '忹' + {0x2F8A0, {0x6081}}, // '悁' + {0x2F8A1, {0x393A}}, // '㤺' + {0x2F8A2, {0x391C}}, // '㤜' + {0x2F8A3, {0x6094}}, // '悔' + {0x2F8A4, {0x226D4}}, // '𢛔' + {0x2F8A5, {0x60C7}}, // '惇' + {0x2F8A6, {0x6148}}, // '慈' + {0x2F8A7, {0x614C}}, // '慌' + {0x2F8A8, {0x614E}}, // '慎' + {0x2F8A9, {0x614C}}, // '慌' + {0x2F8AA, {0x617A}}, // '慺' + {0x2F8AB, {0x618E}}, // '憎' + {0x2F8AC, {0x61B2}}, // '憲' + {0x2F8AD, {0x61A4}}, // '憤' + {0x2F8AE, {0x61AF}}, // '憯' + {0x2F8AF, {0x61DE}}, // '懞' + {0x2F8B0, {0x61F2}}, // '懲' + {0x2F8B1, {0x61F6}}, // '懶' + {0x2F8B2, {0x6210}}, // '成' + {0x2F8B3, {0x621B}}, // '戛' + {0x2F8B4, {0x625D}}, // '扝' + {0x2F8B5, {0x62B1}}, // '抱' + {0x2F8B6, {0x62D4}}, // '拔' + {0x2F8B7, {0x6350}}, // '捐' + {0x2F8B8, {0x22B0C}}, // '𢬌' + {0x2F8B9, {0x633D}}, // '挽' + {0x2F8BA, {0x62FC}}, // '拼' + {0x2F8BB, {0x6368}}, // '捨' + {0x2F8BC, {0x6383}}, // '掃' + {0x2F8BD, {0x63E4}}, // '揤' + {0x2F8BE, {0x22BF1}}, // '𢯱' + {0x2F8BF, {0x6422}}, // '搢' + {0x2F8C0, {0x63C5}}, // '揅' + {0x2F8C1, {0x63A9}}, // '掩' + {0x2F8C2, {0x3A2E}}, // '㨮' + {0x2F8C3, {0x6469}}, // '摩' + {0x2F8C4, {0x647E}}, // '摾' + {0x2F8C5, {0x649D}}, // '撝' + {0x2F8C6, {0x6477}}, // '摷' + {0x2F8C7, {0x3A6C}}, // '㩬' + {0x2F8C8, {0x654F}}, // '敏' + {0x2F8C9, {0x656C}}, // '敬' + {0x2F8CA, {0x2300A}}, // '𣀊' + {0x2F8CB, {0x65E3}}, // '旣' + {0x2F8CC, {0x66F8}}, // '書' + {0x2F8CD, {0x6649}}, // '晉' + {0x2F8CE, {0x3B19}}, // '㬙' + {0x2F8CF, {0x6691}}, // '暑' + {0x2F8D0, {0x3B08}}, // '㬈' + {0x2F8D1, {0x3AE4}}, // '㫤' + {0x2F8D2, {0x5192}}, // '冒' + {0x2F8D3, {0x5195}}, // '冕' + {0x2F8D4, {0x6700}}, // '最' + {0x2F8D5, {0x669C}}, // '暜' + {0x2F8D6, {0x80AD}}, // '肭' + {0x2F8D7, {0x43D9}}, // '䏙' + {0x2F8D8, {0x6717}}, // '朗' + {0x2F8D9, {0x671B}}, // '望' + {0x2F8DA, {0x6721}}, // '朡' + {0x2F8DB, {0x675E}}, // '杞' + {0x2F8DC, {0x6753}}, // '杓' + {0x2F8DD, {0x233C3}}, // '𣏃' + {0x2F8DE, {0x3B49}}, // '㭉' + {0x2F8DF, {0x67FA}}, // '柺' + {0x2F8E0, {0x6785}}, // '枅' + {0x2F8E1, {0x6852}}, // '桒' + {0x2F8E2, {0x6885}}, // '梅' + {0x2F8E3, {0x2346D}}, // '𣑭' + {0x2F8E4, {0x688E}}, // '梎' + {0x2F8E5, {0x681F}}, // '栟' + {0x2F8E6, {0x6914}}, // '椔' + {0x2F8E7, {0x3B9D}}, // '㮝' + {0x2F8E8, {0x6942}}, // '楂' + {0x2F8E9, {0x69A3}}, // '榣' + {0x2F8EA, {0x69EA}}, // '槪' + {0x2F8EB, {0x6AA8}}, // '檨' + {0x2F8EC, {0x236A3}}, // '𣚣' + {0x2F8ED, {0x6ADB}}, // '櫛' + {0x2F8EE, {0x3C18}}, // '㰘' + {0x2F8EF, {0x6B21}}, // '次' + {0x2F8F0, {0x238A7}}, // '𣢧' + {0x2F8F1, {0x6B54}}, // '歔' + {0x2F8F2, {0x3C4E}}, // '㱎' + {0x2F8F3, {0x6B72}}, // '歲' + {0x2F8F4, {0x6B9F}}, // '殟' + {0x2F8F5, {0x6BBA}}, // '殺' + {0x2F8F6, {0x6BBB}}, // '殻' + {0x2F8F7, {0x23A8D}}, // '𣪍' + {0x2F8F8, {0x21D0B}}, // '𡴋' + {0x2F8F9, {0x23AFA}}, // '𣫺' + {0x2F8FA, {0x6C4E}}, // '汎' + {0x2F8FB, {0x23CBC}}, // '𣲼' + {0x2F8FC, {0x6CBF}}, // '沿' + {0x2F8FD, {0x6CCD}}, // '泍' + {0x2F8FE, {0x6C67}}, // '汧' + {0x2F8FF, {0x6D16}}, // '洖' + {0x2F900, {0x6D3E}}, // '派' + {0x2F901, {0x6D77}}, // '海' + {0x2F902, {0x6D41}}, // '流' + {0x2F903, {0x6D69}}, // '浩' + {0x2F904, {0x6D78}}, // '浸' + {0x2F905, {0x6D85}}, // '涅' + {0x2F906, {0x23D1E}}, // '𣴞' + {0x2F907, {0x6D34}}, // '洴' + {0x2F908, {0x6E2F}}, // '港' + {0x2F909, {0x6E6E}}, // '湮' + {0x2F90A, {0x3D33}}, // '㴳' + {0x2F90B, {0x6ECB}}, // '滋' + {0x2F90C, {0x6EC7}}, // '滇' + {0x2F90D, {0x23ED1}}, // '𣻑' + {0x2F90E, {0x6DF9}}, // '淹' + {0x2F90F, {0x6F6E}}, // '潮' + {0x2F910, {0x23F5E}}, // '𣽞' + {0x2F911, {0x23F8E}}, // '𣾎' + {0x2F912, {0x6FC6}}, // '濆' + {0x2F913, {0x7039}}, // '瀹' + {0x2F914, {0x701E}}, // '瀞' + {0x2F915, {0x701B}}, // '瀛' + {0x2F916, {0x3D96}}, // '㶖' + {0x2F917, {0x704A}}, // '灊' + {0x2F918, {0x707D}}, // '災' + {0x2F919, {0x7077}}, // '灷' + {0x2F91A, {0x70AD}}, // '炭' + {0x2F91B, {0x20525}}, // '𠔥' + {0x2F91C, {0x7145}}, // '煅' + {0x2F91D, {0x24263}}, // '𤉣' + {0x2F91E, {0x719C}}, // '熜' + {0x2F91F, {0x243AB}}, // '𤎫' + {0x2F920, {0x7228}}, // '爨' + {0x2F921, {0x7235}}, // '爵' + {0x2F922, {0x7250}}, // '牐' + {0x2F923, {0x24608}}, // '𤘈' + {0x2F924, {0x7280}}, // '犀' + {0x2F925, {0x7295}}, // '犕' + {0x2F926, {0x24735}}, // '𤜵' + {0x2F927, {0x24814}}, // '𤠔' + {0x2F928, {0x737A}}, // '獺' + {0x2F929, {0x738B}}, // '王' + {0x2F92A, {0x3EAC}}, // '㺬' + {0x2F92B, {0x73A5}}, // '玥' + {0x2F92C, {0x3EB8}}, // '㺸' + {0x2F92D, {0x3EB8}}, // '㺸' + {0x2F92E, {0x7447}}, // '瑇' + {0x2F92F, {0x745C}}, // '瑜' + {0x2F930, {0x7471}}, // '瑱' + {0x2F931, {0x7485}}, // '璅' + {0x2F932, {0x74CA}}, // '瓊' + {0x2F933, {0x3F1B}}, // '㼛' + {0x2F934, {0x7524}}, // '甤' + {0x2F935, {0x24C36}}, // '𤰶' + {0x2F936, {0x753E}}, // '甾' + {0x2F937, {0x24C92}}, // '𤲒' + {0x2F938, {0x7570}}, // '異' + {0x2F939, {0x2219F}}, // '𢆟' + {0x2F93A, {0x7610}}, // '瘐' + {0x2F93B, {0x24FA1}}, // '𤾡' + {0x2F93C, {0x24FB8}}, // '𤾸' + {0x2F93D, {0x25044}}, // '𥁄' + {0x2F93E, {0x3FFC}}, // '㿼' + {0x2F93F, {0x4008}}, // '䀈' + {0x2F940, {0x76F4}}, // '直' + {0x2F941, {0x250F3}}, // '𥃳' + {0x2F942, {0x250F2}}, // '𥃲' + {0x2F943, {0x25119}}, // '𥄙' + {0x2F944, {0x25133}}, // '𥄳' + {0x2F945, {0x771E}}, // '眞' + {0x2F946, {0x771F}}, // '真' + {0x2F947, {0x771F}}, // '真' + {0x2F948, {0x774A}}, // '睊' + {0x2F949, {0x4039}}, // '䀹' + {0x2F94A, {0x778B}}, // '瞋' + {0x2F94B, {0x4046}}, // '䁆' + {0x2F94C, {0x4096}}, // '䂖' + {0x2F94D, {0x2541D}}, // '𥐝' + {0x2F94E, {0x784E}}, // '硎' + {0x2F94F, {0x788C}}, // '碌' + {0x2F950, {0x78CC}}, // '磌' + {0x2F951, {0x40E3}}, // '䃣' + {0x2F952, {0x25626}}, // '𥘦' + {0x2F953, {0x7956}}, // '祖' + {0x2F954, {0x2569A}}, // '𥚚' + {0x2F955, {0x256C5}}, // '𥛅' + {0x2F956, {0x798F}}, // '福' + {0x2F957, {0x79EB}}, // '秫' + {0x2F958, {0x412F}}, // '䄯' + {0x2F959, {0x7A40}}, // '穀' + {0x2F95A, {0x7A4A}}, // '穊' + {0x2F95B, {0x7A4F}}, // '穏' + {0x2F95C, {0x2597C}}, // '𥥼' + {0x2F95D, {0x25AA7}}, // '𥪧' + {0x2F95E, {0x25AA7}}, // '𥪧' + {0x2F95F, {0x7AEE}}, // '竮' + {0x2F960, {0x4202}}, // '䈂' + {0x2F961, {0x25BAB}}, // '𥮫' + {0x2F962, {0x7BC6}}, // '篆' + {0x2F963, {0x7BC9}}, // '築' + {0x2F964, {0x4227}}, // '䈧' + {0x2F965, {0x25C80}}, // '𥲀' + {0x2F966, {0x7CD2}}, // '糒' + {0x2F967, {0x42A0}}, // '䊠' + {0x2F968, {0x7CE8}}, // '糨' + {0x2F969, {0x7CE3}}, // '糣' + {0x2F96A, {0x7D00}}, // '紀' + {0x2F96B, {0x25F86}}, // '𥾆' + {0x2F96C, {0x7D63}}, // '絣' + {0x2F96D, {0x4301}}, // '䌁' + {0x2F96E, {0x7DC7}}, // '緇' + {0x2F96F, {0x7E02}}, // '縂' + {0x2F970, {0x7E45}}, // '繅' + {0x2F971, {0x4334}}, // '䌴' + {0x2F972, {0x26228}}, // '𦈨' + {0x2F973, {0x26247}}, // '𦉇' + {0x2F974, {0x4359}}, // '䍙' + {0x2F975, {0x262D9}}, // '𦋙' + {0x2F976, {0x7F7A}}, // '罺' + {0x2F977, {0x2633E}}, // '𦌾' + {0x2F978, {0x7F95}}, // '羕' + {0x2F979, {0x7FFA}}, // '翺' + {0x2F97A, {0x8005}}, // '者' + {0x2F97B, {0x264DA}}, // '𦓚' + {0x2F97C, {0x26523}}, // '𦔣' + {0x2F97D, {0x8060}}, // '聠' + {0x2F97E, {0x265A8}}, // '𦖨' + {0x2F97F, {0x8070}}, // '聰' + {0x2F980, {0x2335F}}, // '𣍟' + {0x2F981, {0x43D5}}, // '䏕' + {0x2F982, {0x80B2}}, // '育' + {0x2F983, {0x8103}}, // '脃' + {0x2F984, {0x440B}}, // '䐋' + {0x2F985, {0x813E}}, // '脾' + {0x2F986, {0x5AB5}}, // '媵' + {0x2F987, {0x267A7}}, // '𦞧' + {0x2F988, {0x267B5}}, // '𦞵' + {0x2F989, {0x23393}}, // '𣎓' + {0x2F98A, {0x2339C}}, // '𣎜' + {0x2F98B, {0x8201}}, // '舁' + {0x2F98C, {0x8204}}, // '舄' + {0x2F98D, {0x8F9E}}, // '辞' + {0x2F98E, {0x446B}}, // '䑫' + {0x2F98F, {0x8291}}, // '芑' + {0x2F990, {0x828B}}, // '芋' + {0x2F991, {0x829D}}, // '芝' + {0x2F992, {0x52B3}}, // '劳' + {0x2F993, {0x82B1}}, // '花' + {0x2F994, {0x82B3}}, // '芳' + {0x2F995, {0x82BD}}, // '芽' + {0x2F996, {0x82E6}}, // '苦' + {0x2F997, {0x26B3C}}, // '𦬼' + {0x2F998, {0x82E5}}, // '若' + {0x2F999, {0x831D}}, // '茝' + {0x2F99A, {0x8363}}, // '荣' + {0x2F99B, {0x83AD}}, // '莭' + {0x2F99C, {0x8323}}, // '茣' + {0x2F99D, {0x83BD}}, // '莽' + {0x2F99E, {0x83E7}}, // '菧' + {0x2F99F, {0x8457}}, // '著' + {0x2F9A0, {0x8353}}, // '荓' + {0x2F9A1, {0x83CA}}, // '菊' + {0x2F9A2, {0x83CC}}, // '菌' + {0x2F9A3, {0x83DC}}, // '菜' + {0x2F9A4, {0x26C36}}, // '𦰶' + {0x2F9A5, {0x26D6B}}, // '𦵫' + {0x2F9A6, {0x26CD5}}, // '𦳕' + {0x2F9A7, {0x452B}}, // '䔫' + {0x2F9A8, {0x84F1}}, // '蓱' + {0x2F9A9, {0x84F3}}, // '蓳' + {0x2F9AA, {0x8516}}, // '蔖' + {0x2F9AB, {0x273CA}}, // '𧏊' + {0x2F9AC, {0x8564}}, // '蕤' + {0x2F9AD, {0x26F2C}}, // '𦼬' + {0x2F9AE, {0x455D}}, // '䕝' + {0x2F9AF, {0x4561}}, // '䕡' + {0x2F9B0, {0x26FB1}}, // '𦾱' + {0x2F9B1, {0x270D2}}, // '𧃒' + {0x2F9B2, {0x456B}}, // '䕫' + {0x2F9B3, {0x8650}}, // '虐' + {0x2F9B4, {0x865C}}, // '虜' + {0x2F9B5, {0x8667}}, // '虧' + {0x2F9B6, {0x8669}}, // '虩' + {0x2F9B7, {0x86A9}}, // '蚩' + {0x2F9B8, {0x8688}}, // '蚈' + {0x2F9B9, {0x870E}}, // '蜎' + {0x2F9BA, {0x86E2}}, // '蛢' + {0x2F9BB, {0x8779}}, // '蝹' + {0x2F9BC, {0x8728}}, // '蜨' + {0x2F9BD, {0x876B}}, // '蝫' + {0x2F9BE, {0x8786}}, // '螆' + {0x2F9BF, {0x45D7}}, // '䗗' + {0x2F9C0, {0x87E1}}, // '蟡' + {0x2F9C1, {0x8801}}, // '蠁' + {0x2F9C2, {0x45F9}}, // '䗹' + {0x2F9C3, {0x8860}}, // '衠' + {0x2F9C4, {0x8863}}, // '衣' + {0x2F9C5, {0x27667}}, // '𧙧' + {0x2F9C6, {0x88D7}}, // '裗' + {0x2F9C7, {0x88DE}}, // '裞' + {0x2F9C8, {0x4635}}, // '䘵' + {0x2F9C9, {0x88FA}}, // '裺' + {0x2F9CA, {0x34BB}}, // '㒻' + {0x2F9CB, {0x278AE}}, // '𧢮' + {0x2F9CC, {0x27966}}, // '𧥦' + {0x2F9CD, {0x46BE}}, // '䚾' + {0x2F9CE, {0x46C7}}, // '䛇' + {0x2F9CF, {0x8AA0}}, // '誠' + {0x2F9D0, {0x8AED}}, // '諭' + {0x2F9D1, {0x8B8A}}, // '變' + {0x2F9D2, {0x8C55}}, // '豕' + {0x2F9D3, {0x27CA8}}, // '𧲨' + {0x2F9D4, {0x8CAB}}, // '貫' + {0x2F9D5, {0x8CC1}}, // '賁' + {0x2F9D6, {0x8D1B}}, // '贛' + {0x2F9D7, {0x8D77}}, // '起' + {0x2F9D8, {0x27F2F}}, // '𧼯' + {0x2F9D9, {0x20804}}, // '𠠄' + {0x2F9DA, {0x8DCB}}, // '跋' + {0x2F9DB, {0x8DBC}}, // '趼' + {0x2F9DC, {0x8DF0}}, // '跰' + {0x2F9DD, {0x208DE}}, // '𠣞' + {0x2F9DE, {0x8ED4}}, // '軔' + {0x2F9DF, {0x8F38}}, // '輸' + {0x2F9E0, {0x285D2}}, // '𨗒' + {0x2F9E1, {0x285ED}}, // '𨗭' + {0x2F9E2, {0x9094}}, // '邔' + {0x2F9E3, {0x90F1}}, // '郱' + {0x2F9E4, {0x9111}}, // '鄑' + {0x2F9E5, {0x2872E}}, // '𨜮' + {0x2F9E6, {0x911B}}, // '鄛' + {0x2F9E7, {0x9238}}, // '鈸' + {0x2F9E8, {0x92D7}}, // '鋗' + {0x2F9E9, {0x92D8}}, // '鋘' + {0x2F9EA, {0x927C}}, // '鉼' + {0x2F9EB, {0x93F9}}, // '鏹' + {0x2F9EC, {0x9415}}, // '鐕' + {0x2F9ED, {0x28BFA}}, // '𨯺' + {0x2F9EE, {0x958B}}, // '開' + {0x2F9EF, {0x4995}}, // '䦕' + {0x2F9F0, {0x95B7}}, // '閷' + {0x2F9F1, {0x28D77}}, // '𨵷' + {0x2F9F2, {0x49E6}}, // '䧦' + {0x2F9F3, {0x96C3}}, // '雃' + {0x2F9F4, {0x5DB2}}, // '嶲' + {0x2F9F5, {0x9723}}, // '霣' + {0x2F9F6, {0x29145}}, // '𩅅' + {0x2F9F7, {0x2921A}}, // '𩈚' + {0x2F9F8, {0x4A6E}}, // '䩮' + {0x2F9F9, {0x4A76}}, // '䩶' + {0x2F9FA, {0x97E0}}, // '韠' + {0x2F9FB, {0x2940A}}, // '𩐊' + {0x2F9FC, {0x4AB2}}, // '䪲' + {0x2F9FD, {0x29496}}, // '𩒖' + {0x2F9FE, {0x980B}}, // '頋' + {0x2F9FF, {0x980B}}, // '頋' + {0x2FA00, {0x9829}}, // '頩' + {0x2FA01, {0x295B6}}, // '𩖶' + {0x2FA02, {0x98E2}}, // '飢' + {0x2FA03, {0x4B33}}, // '䬳' + {0x2FA04, {0x9929}}, // '餩' + {0x2FA05, {0x99A7}}, // '馧' + {0x2FA06, {0x99C2}}, // '駂' + {0x2FA07, {0x99FE}}, // '駾' + {0x2FA08, {0x4BCE}}, // '䯎' + {0x2FA09, {0x29B30}}, // '𩬰' + {0x2FA0A, {0x9B12}}, // '鬒' + {0x2FA0B, {0x9C40}}, // '鱀' + {0x2FA0C, {0x9CFD}}, // '鳽' + {0x2FA0D, {0x4CCE}}, // '䳎' + {0x2FA0E, {0x4CED}}, // '䳭' + {0x2FA0F, {0x9D67}}, // '鵧' + {0x2FA10, {0x2A0CE}}, // '𪃎' + {0x2FA11, {0x4CF8}}, // '䳸' + {0x2FA12, {0x2A105}}, // '𪄅' + {0x2FA13, {0x2A20E}}, // '𪈎' + {0x2FA14, {0x2A291}}, // '𪊑' + {0x2FA15, {0x9EBB}}, // '麻' + {0x2FA16, {0x4D56}}, // '䵖' + {0x2FA17, {0x9EF9}}, // '黹' + {0x2FA18, {0x9EFE}}, // '黾' + {0x2FA19, {0x9F05}}, // '鼅' + {0x2FA1A, {0x9F0F}}, // '鼏' + {0x2FA1B, {0x9F16}}, // '鼖' + {0x2FA1C, {0x9F3B}}, // '鼻' + {0x2FA1D, {0x2A600}}, // '𪘀' }; // Hash function for pair struct pair_hash { - std::size_t operator()(const std::pair &p) const { - return std::hash()((uint64_t(p.first) << 32) | p.second); - } + std::size_t operator()(const std::pair& p) const { + return std::hash()((uint64_t(p.first) << 32) | p.second); + } }; // Canonical composition table: (base, combining_mark) -> composed // Reverse mapping of decomposition for NFC normalization -static const std::unordered_map, uint32_t, - pair_hash> - nfc_composition_table = { - {{0x003C, 0x0338}, 0x226E}, // '<' + combining -> '≮' - {{0x003D, 0x0338}, 0x2260}, // '=' + combining -> '≠' - {{0x003E, 0x0338}, 0x226F}, // '>' + combining -> '≯' - {{0x0041, 0x0300}, 0x00C0}, // 'A' + combining -> 'À' - {{0x0041, 0x0301}, 0x00C1}, // 'A' + combining -> 'Á' - {{0x0041, 0x0302}, 0x00C2}, // 'A' + combining -> 'Â' - {{0x0041, 0x0303}, 0x00C3}, // 'A' + combining -> 'Ã' - {{0x0041, 0x0304}, 0x0100}, // 'A' + combining -> 'Ā' - {{0x0041, 0x0306}, 0x0102}, // 'A' + combining -> 'Ă' - {{0x0041, 0x0307}, 0x0226}, // 'A' + combining -> 'Ȧ' - {{0x0041, 0x0308}, 0x00C4}, // 'A' + combining -> 'Ä' - {{0x0041, 0x0309}, 0x1EA2}, // 'A' + combining -> 'Ả' - {{0x0041, 0x030A}, 0x00C5}, // 'A' + combining -> 'Å' - {{0x0041, 0x030C}, 0x01CD}, // 'A' + combining -> 'Ǎ' - {{0x0041, 0x030F}, 0x0200}, // 'A' + combining -> 'Ȁ' - {{0x0041, 0x0311}, 0x0202}, // 'A' + combining -> 'Ȃ' - {{0x0041, 0x0323}, 0x1EA0}, // 'A' + combining -> 'Ạ' - {{0x0041, 0x0325}, 0x1E00}, // 'A' + combining -> 'Ḁ' - {{0x0041, 0x0328}, 0x0104}, // 'A' + combining -> 'Ą' - {{0x0042, 0x0307}, 0x1E02}, // 'B' + combining -> 'Ḃ' - {{0x0042, 0x0323}, 0x1E04}, // 'B' + combining -> 'Ḅ' - {{0x0042, 0x0331}, 0x1E06}, // 'B' + combining -> 'Ḇ' - {{0x0043, 0x0301}, 0x0106}, // 'C' + combining -> 'Ć' - {{0x0043, 0x0302}, 0x0108}, // 'C' + combining -> 'Ĉ' - {{0x0043, 0x0307}, 0x010A}, // 'C' + combining -> 'Ċ' - {{0x0043, 0x030C}, 0x010C}, // 'C' + combining -> 'Č' - {{0x0043, 0x0327}, 0x00C7}, // 'C' + combining -> 'Ç' - {{0x0044, 0x0307}, 0x1E0A}, // 'D' + combining -> 'Ḋ' - {{0x0044, 0x030C}, 0x010E}, // 'D' + combining -> 'Ď' - {{0x0044, 0x0323}, 0x1E0C}, // 'D' + combining -> 'Ḍ' - {{0x0044, 0x0327}, 0x1E10}, // 'D' + combining -> 'Ḑ' - {{0x0044, 0x032D}, 0x1E12}, // 'D' + combining -> 'Ḓ' - {{0x0044, 0x0331}, 0x1E0E}, // 'D' + combining -> 'Ḏ' - {{0x0045, 0x0300}, 0x00C8}, // 'E' + combining -> 'È' - {{0x0045, 0x0301}, 0x00C9}, // 'E' + combining -> 'É' - {{0x0045, 0x0302}, 0x00CA}, // 'E' + combining -> 'Ê' - {{0x0045, 0x0303}, 0x1EBC}, // 'E' + combining -> 'Ẽ' - {{0x0045, 0x0304}, 0x0112}, // 'E' + combining -> 'Ē' - {{0x0045, 0x0306}, 0x0114}, // 'E' + combining -> 'Ĕ' - {{0x0045, 0x0307}, 0x0116}, // 'E' + combining -> 'Ė' - {{0x0045, 0x0308}, 0x00CB}, // 'E' + combining -> 'Ë' - {{0x0045, 0x0309}, 0x1EBA}, // 'E' + combining -> 'Ẻ' - {{0x0045, 0x030C}, 0x011A}, // 'E' + combining -> 'Ě' - {{0x0045, 0x030F}, 0x0204}, // 'E' + combining -> 'Ȅ' - {{0x0045, 0x0311}, 0x0206}, // 'E' + combining -> 'Ȇ' - {{0x0045, 0x0323}, 0x1EB8}, // 'E' + combining -> 'Ẹ' - {{0x0045, 0x0327}, 0x0228}, // 'E' + combining -> 'Ȩ' - {{0x0045, 0x0328}, 0x0118}, // 'E' + combining -> 'Ę' - {{0x0045, 0x032D}, 0x1E18}, // 'E' + combining -> 'Ḙ' - {{0x0045, 0x0330}, 0x1E1A}, // 'E' + combining -> 'Ḛ' - {{0x0046, 0x0307}, 0x1E1E}, // 'F' + combining -> 'Ḟ' - {{0x0047, 0x0301}, 0x01F4}, // 'G' + combining -> 'Ǵ' - {{0x0047, 0x0302}, 0x011C}, // 'G' + combining -> 'Ĝ' - {{0x0047, 0x0304}, 0x1E20}, // 'G' + combining -> 'Ḡ' - {{0x0047, 0x0306}, 0x011E}, // 'G' + combining -> 'Ğ' - {{0x0047, 0x0307}, 0x0120}, // 'G' + combining -> 'Ġ' - {{0x0047, 0x030C}, 0x01E6}, // 'G' + combining -> 'Ǧ' - {{0x0047, 0x0327}, 0x0122}, // 'G' + combining -> 'Ģ' - {{0x0048, 0x0302}, 0x0124}, // 'H' + combining -> 'Ĥ' - {{0x0048, 0x0307}, 0x1E22}, // 'H' + combining -> 'Ḣ' - {{0x0048, 0x0308}, 0x1E26}, // 'H' + combining -> 'Ḧ' - {{0x0048, 0x030C}, 0x021E}, // 'H' + combining -> 'Ȟ' - {{0x0048, 0x0323}, 0x1E24}, // 'H' + combining -> 'Ḥ' - {{0x0048, 0x0327}, 0x1E28}, // 'H' + combining -> 'Ḩ' - {{0x0048, 0x032E}, 0x1E2A}, // 'H' + combining -> 'Ḫ' - {{0x0049, 0x0300}, 0x00CC}, // 'I' + combining -> 'Ì' - {{0x0049, 0x0301}, 0x00CD}, // 'I' + combining -> 'Í' - {{0x0049, 0x0302}, 0x00CE}, // 'I' + combining -> 'Î' - {{0x0049, 0x0303}, 0x0128}, // 'I' + combining -> 'Ĩ' - {{0x0049, 0x0304}, 0x012A}, // 'I' + combining -> 'Ī' - {{0x0049, 0x0306}, 0x012C}, // 'I' + combining -> 'Ĭ' - {{0x0049, 0x0307}, 0x0130}, // 'I' + combining -> 'İ' - {{0x0049, 0x0308}, 0x00CF}, // 'I' + combining -> 'Ï' - {{0x0049, 0x0309}, 0x1EC8}, // 'I' + combining -> 'Ỉ' - {{0x0049, 0x030C}, 0x01CF}, // 'I' + combining -> 'Ǐ' - {{0x0049, 0x030F}, 0x0208}, // 'I' + combining -> 'Ȉ' - {{0x0049, 0x0311}, 0x020A}, // 'I' + combining -> 'Ȋ' - {{0x0049, 0x0323}, 0x1ECA}, // 'I' + combining -> 'Ị' - {{0x0049, 0x0328}, 0x012E}, // 'I' + combining -> 'Į' - {{0x0049, 0x0330}, 0x1E2C}, // 'I' + combining -> 'Ḭ' - {{0x004A, 0x0302}, 0x0134}, // 'J' + combining -> 'Ĵ' - {{0x004B, 0x0301}, 0x1E30}, // 'K' + combining -> 'Ḱ' - {{0x004B, 0x030C}, 0x01E8}, // 'K' + combining -> 'Ǩ' - {{0x004B, 0x0323}, 0x1E32}, // 'K' + combining -> 'Ḳ' - {{0x004B, 0x0327}, 0x0136}, // 'K' + combining -> 'Ķ' - {{0x004B, 0x0331}, 0x1E34}, // 'K' + combining -> 'Ḵ' - {{0x004C, 0x0301}, 0x0139}, // 'L' + combining -> 'Ĺ' - {{0x004C, 0x030C}, 0x013D}, // 'L' + combining -> 'Ľ' - {{0x004C, 0x0323}, 0x1E36}, // 'L' + combining -> 'Ḷ' - {{0x004C, 0x0327}, 0x013B}, // 'L' + combining -> 'Ļ' - {{0x004C, 0x032D}, 0x1E3C}, // 'L' + combining -> 'Ḽ' - {{0x004C, 0x0331}, 0x1E3A}, // 'L' + combining -> 'Ḻ' - {{0x004D, 0x0301}, 0x1E3E}, // 'M' + combining -> 'Ḿ' - {{0x004D, 0x0307}, 0x1E40}, // 'M' + combining -> 'Ṁ' - {{0x004D, 0x0323}, 0x1E42}, // 'M' + combining -> 'Ṃ' - {{0x004E, 0x0300}, 0x01F8}, // 'N' + combining -> 'Ǹ' - {{0x004E, 0x0301}, 0x0143}, // 'N' + combining -> 'Ń' - {{0x004E, 0x0303}, 0x00D1}, // 'N' + combining -> 'Ñ' - {{0x004E, 0x0307}, 0x1E44}, // 'N' + combining -> 'Ṅ' - {{0x004E, 0x030C}, 0x0147}, // 'N' + combining -> 'Ň' - {{0x004E, 0x0323}, 0x1E46}, // 'N' + combining -> 'Ṇ' - {{0x004E, 0x0327}, 0x0145}, // 'N' + combining -> 'Ņ' - {{0x004E, 0x032D}, 0x1E4A}, // 'N' + combining -> 'Ṋ' - {{0x004E, 0x0331}, 0x1E48}, // 'N' + combining -> 'Ṉ' - {{0x004F, 0x0300}, 0x00D2}, // 'O' + combining -> 'Ò' - {{0x004F, 0x0301}, 0x00D3}, // 'O' + combining -> 'Ó' - {{0x004F, 0x0302}, 0x00D4}, // 'O' + combining -> 'Ô' - {{0x004F, 0x0303}, 0x00D5}, // 'O' + combining -> 'Õ' - {{0x004F, 0x0304}, 0x014C}, // 'O' + combining -> 'Ō' - {{0x004F, 0x0306}, 0x014E}, // 'O' + combining -> 'Ŏ' - {{0x004F, 0x0307}, 0x022E}, // 'O' + combining -> 'Ȯ' - {{0x004F, 0x0308}, 0x00D6}, // 'O' + combining -> 'Ö' - {{0x004F, 0x0309}, 0x1ECE}, // 'O' + combining -> 'Ỏ' - {{0x004F, 0x030B}, 0x0150}, // 'O' + combining -> 'Ő' - {{0x004F, 0x030C}, 0x01D1}, // 'O' + combining -> 'Ǒ' - {{0x004F, 0x030F}, 0x020C}, // 'O' + combining -> 'Ȍ' - {{0x004F, 0x0311}, 0x020E}, // 'O' + combining -> 'Ȏ' - {{0x004F, 0x031B}, 0x01A0}, // 'O' + combining -> 'Ơ' - {{0x004F, 0x0323}, 0x1ECC}, // 'O' + combining -> 'Ọ' - {{0x004F, 0x0328}, 0x01EA}, // 'O' + combining -> 'Ǫ' - {{0x0050, 0x0301}, 0x1E54}, // 'P' + combining -> 'Ṕ' - {{0x0050, 0x0307}, 0x1E56}, // 'P' + combining -> 'Ṗ' - {{0x0052, 0x0301}, 0x0154}, // 'R' + combining -> 'Ŕ' - {{0x0052, 0x0307}, 0x1E58}, // 'R' + combining -> 'Ṙ' - {{0x0052, 0x030C}, 0x0158}, // 'R' + combining -> 'Ř' - {{0x0052, 0x030F}, 0x0210}, // 'R' + combining -> 'Ȑ' - {{0x0052, 0x0311}, 0x0212}, // 'R' + combining -> 'Ȓ' - {{0x0052, 0x0323}, 0x1E5A}, // 'R' + combining -> 'Ṛ' - {{0x0052, 0x0327}, 0x0156}, // 'R' + combining -> 'Ŗ' - {{0x0052, 0x0331}, 0x1E5E}, // 'R' + combining -> 'Ṟ' - {{0x0053, 0x0301}, 0x015A}, // 'S' + combining -> 'Ś' - {{0x0053, 0x0302}, 0x015C}, // 'S' + combining -> 'Ŝ' - {{0x0053, 0x0307}, 0x1E60}, // 'S' + combining -> 'Ṡ' - {{0x0053, 0x030C}, 0x0160}, // 'S' + combining -> 'Š' - {{0x0053, 0x0323}, 0x1E62}, // 'S' + combining -> 'Ṣ' - {{0x0053, 0x0326}, 0x0218}, // 'S' + combining -> 'Ș' - {{0x0053, 0x0327}, 0x015E}, // 'S' + combining -> 'Ş' - {{0x0054, 0x0307}, 0x1E6A}, // 'T' + combining -> 'Ṫ' - {{0x0054, 0x030C}, 0x0164}, // 'T' + combining -> 'Ť' - {{0x0054, 0x0323}, 0x1E6C}, // 'T' + combining -> 'Ṭ' - {{0x0054, 0x0326}, 0x021A}, // 'T' + combining -> 'Ț' - {{0x0054, 0x0327}, 0x0162}, // 'T' + combining -> 'Ţ' - {{0x0054, 0x032D}, 0x1E70}, // 'T' + combining -> 'Ṱ' - {{0x0054, 0x0331}, 0x1E6E}, // 'T' + combining -> 'Ṯ' - {{0x0055, 0x0300}, 0x00D9}, // 'U' + combining -> 'Ù' - {{0x0055, 0x0301}, 0x00DA}, // 'U' + combining -> 'Ú' - {{0x0055, 0x0302}, 0x00DB}, // 'U' + combining -> 'Û' - {{0x0055, 0x0303}, 0x0168}, // 'U' + combining -> 'Ũ' - {{0x0055, 0x0304}, 0x016A}, // 'U' + combining -> 'Ū' - {{0x0055, 0x0306}, 0x016C}, // 'U' + combining -> 'Ŭ' - {{0x0055, 0x0308}, 0x00DC}, // 'U' + combining -> 'Ü' - {{0x0055, 0x0309}, 0x1EE6}, // 'U' + combining -> 'Ủ' - {{0x0055, 0x030A}, 0x016E}, // 'U' + combining -> 'Ů' - {{0x0055, 0x030B}, 0x0170}, // 'U' + combining -> 'Ű' - {{0x0055, 0x030C}, 0x01D3}, // 'U' + combining -> 'Ǔ' - {{0x0055, 0x030F}, 0x0214}, // 'U' + combining -> 'Ȕ' - {{0x0055, 0x0311}, 0x0216}, // 'U' + combining -> 'Ȗ' - {{0x0055, 0x031B}, 0x01AF}, // 'U' + combining -> 'Ư' - {{0x0055, 0x0323}, 0x1EE4}, // 'U' + combining -> 'Ụ' - {{0x0055, 0x0324}, 0x1E72}, // 'U' + combining -> 'Ṳ' - {{0x0055, 0x0328}, 0x0172}, // 'U' + combining -> 'Ų' - {{0x0055, 0x032D}, 0x1E76}, // 'U' + combining -> 'Ṷ' - {{0x0055, 0x0330}, 0x1E74}, // 'U' + combining -> 'Ṵ' - {{0x0056, 0x0303}, 0x1E7C}, // 'V' + combining -> 'Ṽ' - {{0x0056, 0x0323}, 0x1E7E}, // 'V' + combining -> 'Ṿ' - {{0x0057, 0x0300}, 0x1E80}, // 'W' + combining -> 'Ẁ' - {{0x0057, 0x0301}, 0x1E82}, // 'W' + combining -> 'Ẃ' - {{0x0057, 0x0302}, 0x0174}, // 'W' + combining -> 'Ŵ' - {{0x0057, 0x0307}, 0x1E86}, // 'W' + combining -> 'Ẇ' - {{0x0057, 0x0308}, 0x1E84}, // 'W' + combining -> 'Ẅ' - {{0x0057, 0x0323}, 0x1E88}, // 'W' + combining -> 'Ẉ' - {{0x0058, 0x0307}, 0x1E8A}, // 'X' + combining -> 'Ẋ' - {{0x0058, 0x0308}, 0x1E8C}, // 'X' + combining -> 'Ẍ' - {{0x0059, 0x0300}, 0x1EF2}, // 'Y' + combining -> 'Ỳ' - {{0x0059, 0x0301}, 0x00DD}, // 'Y' + combining -> 'Ý' - {{0x0059, 0x0302}, 0x0176}, // 'Y' + combining -> 'Ŷ' - {{0x0059, 0x0303}, 0x1EF8}, // 'Y' + combining -> 'Ỹ' - {{0x0059, 0x0304}, 0x0232}, // 'Y' + combining -> 'Ȳ' - {{0x0059, 0x0307}, 0x1E8E}, // 'Y' + combining -> 'Ẏ' - {{0x0059, 0x0308}, 0x0178}, // 'Y' + combining -> 'Ÿ' - {{0x0059, 0x0309}, 0x1EF6}, // 'Y' + combining -> 'Ỷ' - {{0x0059, 0x0323}, 0x1EF4}, // 'Y' + combining -> 'Ỵ' - {{0x005A, 0x0301}, 0x0179}, // 'Z' + combining -> 'Ź' - {{0x005A, 0x0302}, 0x1E90}, // 'Z' + combining -> 'Ẑ' - {{0x005A, 0x0307}, 0x017B}, // 'Z' + combining -> 'Ż' - {{0x005A, 0x030C}, 0x017D}, // 'Z' + combining -> 'Ž' - {{0x005A, 0x0323}, 0x1E92}, // 'Z' + combining -> 'Ẓ' - {{0x005A, 0x0331}, 0x1E94}, // 'Z' + combining -> 'Ẕ' - {{0x0061, 0x0300}, 0x00E0}, // 'a' + combining -> 'à' - {{0x0061, 0x0301}, 0x00E1}, // 'a' + combining -> 'á' - {{0x0061, 0x0302}, 0x00E2}, // 'a' + combining -> 'â' - {{0x0061, 0x0303}, 0x00E3}, // 'a' + combining -> 'ã' - {{0x0061, 0x0304}, 0x0101}, // 'a' + combining -> 'ā' - {{0x0061, 0x0306}, 0x0103}, // 'a' + combining -> 'ă' - {{0x0061, 0x0307}, 0x0227}, // 'a' + combining -> 'ȧ' - {{0x0061, 0x0308}, 0x00E4}, // 'a' + combining -> 'ä' - {{0x0061, 0x0309}, 0x1EA3}, // 'a' + combining -> 'ả' - {{0x0061, 0x030A}, 0x00E5}, // 'a' + combining -> 'å' - {{0x0061, 0x030C}, 0x01CE}, // 'a' + combining -> 'ǎ' - {{0x0061, 0x030F}, 0x0201}, // 'a' + combining -> 'ȁ' - {{0x0061, 0x0311}, 0x0203}, // 'a' + combining -> 'ȃ' - {{0x0061, 0x0323}, 0x1EA1}, // 'a' + combining -> 'ạ' - {{0x0061, 0x0325}, 0x1E01}, // 'a' + combining -> 'ḁ' - {{0x0061, 0x0328}, 0x0105}, // 'a' + combining -> 'ą' - {{0x0062, 0x0307}, 0x1E03}, // 'b' + combining -> 'ḃ' - {{0x0062, 0x0323}, 0x1E05}, // 'b' + combining -> 'ḅ' - {{0x0062, 0x0331}, 0x1E07}, // 'b' + combining -> 'ḇ' - {{0x0063, 0x0301}, 0x0107}, // 'c' + combining -> 'ć' - {{0x0063, 0x0302}, 0x0109}, // 'c' + combining -> 'ĉ' - {{0x0063, 0x0307}, 0x010B}, // 'c' + combining -> 'ċ' - {{0x0063, 0x030C}, 0x010D}, // 'c' + combining -> 'č' - {{0x0063, 0x0327}, 0x00E7}, // 'c' + combining -> 'ç' - {{0x0064, 0x0307}, 0x1E0B}, // 'd' + combining -> 'ḋ' - {{0x0064, 0x030C}, 0x010F}, // 'd' + combining -> 'ď' - {{0x0064, 0x0323}, 0x1E0D}, // 'd' + combining -> 'ḍ' - {{0x0064, 0x0327}, 0x1E11}, // 'd' + combining -> 'ḑ' - {{0x0064, 0x032D}, 0x1E13}, // 'd' + combining -> 'ḓ' - {{0x0064, 0x0331}, 0x1E0F}, // 'd' + combining -> 'ḏ' - {{0x0065, 0x0300}, 0x00E8}, // 'e' + combining -> 'è' - {{0x0065, 0x0301}, 0x00E9}, // 'e' + combining -> 'é' - {{0x0065, 0x0302}, 0x00EA}, // 'e' + combining -> 'ê' - {{0x0065, 0x0303}, 0x1EBD}, // 'e' + combining -> 'ẽ' - {{0x0065, 0x0304}, 0x0113}, // 'e' + combining -> 'ē' - {{0x0065, 0x0306}, 0x0115}, // 'e' + combining -> 'ĕ' - {{0x0065, 0x0307}, 0x0117}, // 'e' + combining -> 'ė' - {{0x0065, 0x0308}, 0x00EB}, // 'e' + combining -> 'ë' - {{0x0065, 0x0309}, 0x1EBB}, // 'e' + combining -> 'ẻ' - {{0x0065, 0x030C}, 0x011B}, // 'e' + combining -> 'ě' - {{0x0065, 0x030F}, 0x0205}, // 'e' + combining -> 'ȅ' - {{0x0065, 0x0311}, 0x0207}, // 'e' + combining -> 'ȇ' - {{0x0065, 0x0323}, 0x1EB9}, // 'e' + combining -> 'ẹ' - {{0x0065, 0x0327}, 0x0229}, // 'e' + combining -> 'ȩ' - {{0x0065, 0x0328}, 0x0119}, // 'e' + combining -> 'ę' - {{0x0065, 0x032D}, 0x1E19}, // 'e' + combining -> 'ḙ' - {{0x0065, 0x0330}, 0x1E1B}, // 'e' + combining -> 'ḛ' - {{0x0066, 0x0307}, 0x1E1F}, // 'f' + combining -> 'ḟ' - {{0x0067, 0x0301}, 0x01F5}, // 'g' + combining -> 'ǵ' - {{0x0067, 0x0302}, 0x011D}, // 'g' + combining -> 'ĝ' - {{0x0067, 0x0304}, 0x1E21}, // 'g' + combining -> 'ḡ' - {{0x0067, 0x0306}, 0x011F}, // 'g' + combining -> 'ğ' - {{0x0067, 0x0307}, 0x0121}, // 'g' + combining -> 'ġ' - {{0x0067, 0x030C}, 0x01E7}, // 'g' + combining -> 'ǧ' - {{0x0067, 0x0327}, 0x0123}, // 'g' + combining -> 'ģ' - {{0x0068, 0x0302}, 0x0125}, // 'h' + combining -> 'ĥ' - {{0x0068, 0x0307}, 0x1E23}, // 'h' + combining -> 'ḣ' - {{0x0068, 0x0308}, 0x1E27}, // 'h' + combining -> 'ḧ' - {{0x0068, 0x030C}, 0x021F}, // 'h' + combining -> 'ȟ' - {{0x0068, 0x0323}, 0x1E25}, // 'h' + combining -> 'ḥ' - {{0x0068, 0x0327}, 0x1E29}, // 'h' + combining -> 'ḩ' - {{0x0068, 0x032E}, 0x1E2B}, // 'h' + combining -> 'ḫ' - {{0x0068, 0x0331}, 0x1E96}, // 'h' + combining -> 'ẖ' - {{0x0069, 0x0300}, 0x00EC}, // 'i' + combining -> 'ì' - {{0x0069, 0x0301}, 0x00ED}, // 'i' + combining -> 'í' - {{0x0069, 0x0302}, 0x00EE}, // 'i' + combining -> 'î' - {{0x0069, 0x0303}, 0x0129}, // 'i' + combining -> 'ĩ' - {{0x0069, 0x0304}, 0x012B}, // 'i' + combining -> 'ī' - {{0x0069, 0x0306}, 0x012D}, // 'i' + combining -> 'ĭ' - {{0x0069, 0x0308}, 0x00EF}, // 'i' + combining -> 'ï' - {{0x0069, 0x0309}, 0x1EC9}, // 'i' + combining -> 'ỉ' - {{0x0069, 0x030C}, 0x01D0}, // 'i' + combining -> 'ǐ' - {{0x0069, 0x030F}, 0x0209}, // 'i' + combining -> 'ȉ' - {{0x0069, 0x0311}, 0x020B}, // 'i' + combining -> 'ȋ' - {{0x0069, 0x0323}, 0x1ECB}, // 'i' + combining -> 'ị' - {{0x0069, 0x0328}, 0x012F}, // 'i' + combining -> 'į' - {{0x0069, 0x0330}, 0x1E2D}, // 'i' + combining -> 'ḭ' - {{0x006A, 0x0302}, 0x0135}, // 'j' + combining -> 'ĵ' - {{0x006A, 0x030C}, 0x01F0}, // 'j' + combining -> 'ǰ' - {{0x006B, 0x0301}, 0x1E31}, // 'k' + combining -> 'ḱ' - {{0x006B, 0x030C}, 0x01E9}, // 'k' + combining -> 'ǩ' - {{0x006B, 0x0323}, 0x1E33}, // 'k' + combining -> 'ḳ' - {{0x006B, 0x0327}, 0x0137}, // 'k' + combining -> 'ķ' - {{0x006B, 0x0331}, 0x1E35}, // 'k' + combining -> 'ḵ' - {{0x006C, 0x0301}, 0x013A}, // 'l' + combining -> 'ĺ' - {{0x006C, 0x030C}, 0x013E}, // 'l' + combining -> 'ľ' - {{0x006C, 0x0323}, 0x1E37}, // 'l' + combining -> 'ḷ' - {{0x006C, 0x0327}, 0x013C}, // 'l' + combining -> 'ļ' - {{0x006C, 0x032D}, 0x1E3D}, // 'l' + combining -> 'ḽ' - {{0x006C, 0x0331}, 0x1E3B}, // 'l' + combining -> 'ḻ' - {{0x006D, 0x0301}, 0x1E3F}, // 'm' + combining -> 'ḿ' - {{0x006D, 0x0307}, 0x1E41}, // 'm' + combining -> 'ṁ' - {{0x006D, 0x0323}, 0x1E43}, // 'm' + combining -> 'ṃ' - {{0x006E, 0x0300}, 0x01F9}, // 'n' + combining -> 'ǹ' - {{0x006E, 0x0301}, 0x0144}, // 'n' + combining -> 'ń' - {{0x006E, 0x0303}, 0x00F1}, // 'n' + combining -> 'ñ' - {{0x006E, 0x0307}, 0x1E45}, // 'n' + combining -> 'ṅ' - {{0x006E, 0x030C}, 0x0148}, // 'n' + combining -> 'ň' - {{0x006E, 0x0323}, 0x1E47}, // 'n' + combining -> 'ṇ' - {{0x006E, 0x0327}, 0x0146}, // 'n' + combining -> 'ņ' - {{0x006E, 0x032D}, 0x1E4B}, // 'n' + combining -> 'ṋ' - {{0x006E, 0x0331}, 0x1E49}, // 'n' + combining -> 'ṉ' - {{0x006F, 0x0300}, 0x00F2}, // 'o' + combining -> 'ò' - {{0x006F, 0x0301}, 0x00F3}, // 'o' + combining -> 'ó' - {{0x006F, 0x0302}, 0x00F4}, // 'o' + combining -> 'ô' - {{0x006F, 0x0303}, 0x00F5}, // 'o' + combining -> 'õ' - {{0x006F, 0x0304}, 0x014D}, // 'o' + combining -> 'ō' - {{0x006F, 0x0306}, 0x014F}, // 'o' + combining -> 'ŏ' - {{0x006F, 0x0307}, 0x022F}, // 'o' + combining -> 'ȯ' - {{0x006F, 0x0308}, 0x00F6}, // 'o' + combining -> 'ö' - {{0x006F, 0x0309}, 0x1ECF}, // 'o' + combining -> 'ỏ' - {{0x006F, 0x030B}, 0x0151}, // 'o' + combining -> 'ő' - {{0x006F, 0x030C}, 0x01D2}, // 'o' + combining -> 'ǒ' - {{0x006F, 0x030F}, 0x020D}, // 'o' + combining -> 'ȍ' - {{0x006F, 0x0311}, 0x020F}, // 'o' + combining -> 'ȏ' - {{0x006F, 0x031B}, 0x01A1}, // 'o' + combining -> 'ơ' - {{0x006F, 0x0323}, 0x1ECD}, // 'o' + combining -> 'ọ' - {{0x006F, 0x0328}, 0x01EB}, // 'o' + combining -> 'ǫ' - {{0x0070, 0x0301}, 0x1E55}, // 'p' + combining -> 'ṕ' - {{0x0070, 0x0307}, 0x1E57}, // 'p' + combining -> 'ṗ' - {{0x0072, 0x0301}, 0x0155}, // 'r' + combining -> 'ŕ' - {{0x0072, 0x0307}, 0x1E59}, // 'r' + combining -> 'ṙ' - {{0x0072, 0x030C}, 0x0159}, // 'r' + combining -> 'ř' - {{0x0072, 0x030F}, 0x0211}, // 'r' + combining -> 'ȑ' - {{0x0072, 0x0311}, 0x0213}, // 'r' + combining -> 'ȓ' - {{0x0072, 0x0323}, 0x1E5B}, // 'r' + combining -> 'ṛ' - {{0x0072, 0x0327}, 0x0157}, // 'r' + combining -> 'ŗ' - {{0x0072, 0x0331}, 0x1E5F}, // 'r' + combining -> 'ṟ' - {{0x0073, 0x0301}, 0x015B}, // 's' + combining -> 'ś' - {{0x0073, 0x0302}, 0x015D}, // 's' + combining -> 'ŝ' - {{0x0073, 0x0307}, 0x1E61}, // 's' + combining -> 'ṡ' - {{0x0073, 0x030C}, 0x0161}, // 's' + combining -> 'š' - {{0x0073, 0x0323}, 0x1E63}, // 's' + combining -> 'ṣ' - {{0x0073, 0x0326}, 0x0219}, // 's' + combining -> 'ș' - {{0x0073, 0x0327}, 0x015F}, // 's' + combining -> 'ş' - {{0x0074, 0x0307}, 0x1E6B}, // 't' + combining -> 'ṫ' - {{0x0074, 0x0308}, 0x1E97}, // 't' + combining -> 'ẗ' - {{0x0074, 0x030C}, 0x0165}, // 't' + combining -> 'ť' - {{0x0074, 0x0323}, 0x1E6D}, // 't' + combining -> 'ṭ' - {{0x0074, 0x0326}, 0x021B}, // 't' + combining -> 'ț' - {{0x0074, 0x0327}, 0x0163}, // 't' + combining -> 'ţ' - {{0x0074, 0x032D}, 0x1E71}, // 't' + combining -> 'ṱ' - {{0x0074, 0x0331}, 0x1E6F}, // 't' + combining -> 'ṯ' - {{0x0075, 0x0300}, 0x00F9}, // 'u' + combining -> 'ù' - {{0x0075, 0x0301}, 0x00FA}, // 'u' + combining -> 'ú' - {{0x0075, 0x0302}, 0x00FB}, // 'u' + combining -> 'û' - {{0x0075, 0x0303}, 0x0169}, // 'u' + combining -> 'ũ' - {{0x0075, 0x0304}, 0x016B}, // 'u' + combining -> 'ū' - {{0x0075, 0x0306}, 0x016D}, // 'u' + combining -> 'ŭ' - {{0x0075, 0x0308}, 0x00FC}, // 'u' + combining -> 'ü' - {{0x0075, 0x0309}, 0x1EE7}, // 'u' + combining -> 'ủ' - {{0x0075, 0x030A}, 0x016F}, // 'u' + combining -> 'ů' - {{0x0075, 0x030B}, 0x0171}, // 'u' + combining -> 'ű' - {{0x0075, 0x030C}, 0x01D4}, // 'u' + combining -> 'ǔ' - {{0x0075, 0x030F}, 0x0215}, // 'u' + combining -> 'ȕ' - {{0x0075, 0x0311}, 0x0217}, // 'u' + combining -> 'ȗ' - {{0x0075, 0x031B}, 0x01B0}, // 'u' + combining -> 'ư' - {{0x0075, 0x0323}, 0x1EE5}, // 'u' + combining -> 'ụ' - {{0x0075, 0x0324}, 0x1E73}, // 'u' + combining -> 'ṳ' - {{0x0075, 0x0328}, 0x0173}, // 'u' + combining -> 'ų' - {{0x0075, 0x032D}, 0x1E77}, // 'u' + combining -> 'ṷ' - {{0x0075, 0x0330}, 0x1E75}, // 'u' + combining -> 'ṵ' - {{0x0076, 0x0303}, 0x1E7D}, // 'v' + combining -> 'ṽ' - {{0x0076, 0x0323}, 0x1E7F}, // 'v' + combining -> 'ṿ' - {{0x0077, 0x0300}, 0x1E81}, // 'w' + combining -> 'ẁ' - {{0x0077, 0x0301}, 0x1E83}, // 'w' + combining -> 'ẃ' - {{0x0077, 0x0302}, 0x0175}, // 'w' + combining -> 'ŵ' - {{0x0077, 0x0307}, 0x1E87}, // 'w' + combining -> 'ẇ' - {{0x0077, 0x0308}, 0x1E85}, // 'w' + combining -> 'ẅ' - {{0x0077, 0x030A}, 0x1E98}, // 'w' + combining -> 'ẘ' - {{0x0077, 0x0323}, 0x1E89}, // 'w' + combining -> 'ẉ' - {{0x0078, 0x0307}, 0x1E8B}, // 'x' + combining -> 'ẋ' - {{0x0078, 0x0308}, 0x1E8D}, // 'x' + combining -> 'ẍ' - {{0x0079, 0x0300}, 0x1EF3}, // 'y' + combining -> 'ỳ' - {{0x0079, 0x0301}, 0x00FD}, // 'y' + combining -> 'ý' - {{0x0079, 0x0302}, 0x0177}, // 'y' + combining -> 'ŷ' - {{0x0079, 0x0303}, 0x1EF9}, // 'y' + combining -> 'ỹ' - {{0x0079, 0x0304}, 0x0233}, // 'y' + combining -> 'ȳ' - {{0x0079, 0x0307}, 0x1E8F}, // 'y' + combining -> 'ẏ' - {{0x0079, 0x0308}, 0x00FF}, // 'y' + combining -> 'ÿ' - {{0x0079, 0x0309}, 0x1EF7}, // 'y' + combining -> 'ỷ' - {{0x0079, 0x030A}, 0x1E99}, // 'y' + combining -> 'ẙ' - {{0x0079, 0x0323}, 0x1EF5}, // 'y' + combining -> 'ỵ' - {{0x007A, 0x0301}, 0x017A}, // 'z' + combining -> 'ź' - {{0x007A, 0x0302}, 0x1E91}, // 'z' + combining -> 'ẑ' - {{0x007A, 0x0307}, 0x017C}, // 'z' + combining -> 'ż' - {{0x007A, 0x030C}, 0x017E}, // 'z' + combining -> 'ž' - {{0x007A, 0x0323}, 0x1E93}, // 'z' + combining -> 'ẓ' - {{0x007A, 0x0331}, 0x1E95}, // 'z' + combining -> 'ẕ' - {{0x00A8, 0x0300}, 0x1FED}, // '¨' + combining -> '῭' - {{0x00A8, 0x0301}, 0x0385}, // '¨' + combining -> '΅' - {{0x00A8, 0x0342}, 0x1FC1}, // '¨' + combining -> '῁' - {{0x00C2, 0x0300}, 0x1EA6}, // 'Â' + combining -> 'Ầ' - {{0x00C2, 0x0301}, 0x1EA4}, // 'Â' + combining -> 'Ấ' - {{0x00C2, 0x0303}, 0x1EAA}, // 'Â' + combining -> 'Ẫ' - {{0x00C2, 0x0309}, 0x1EA8}, // 'Â' + combining -> 'Ẩ' - {{0x00C4, 0x0304}, 0x01DE}, // 'Ä' + combining -> 'Ǟ' - {{0x00C5, 0x0301}, 0x01FA}, // 'Å' + combining -> 'Ǻ' - {{0x00C6, 0x0301}, 0x01FC}, // 'Æ' + combining -> 'Ǽ' - {{0x00C6, 0x0304}, 0x01E2}, // 'Æ' + combining -> 'Ǣ' - {{0x00C7, 0x0301}, 0x1E08}, // 'Ç' + combining -> 'Ḉ' - {{0x00CA, 0x0300}, 0x1EC0}, // 'Ê' + combining -> 'Ề' - {{0x00CA, 0x0301}, 0x1EBE}, // 'Ê' + combining -> 'Ế' - {{0x00CA, 0x0303}, 0x1EC4}, // 'Ê' + combining -> 'Ễ' - {{0x00CA, 0x0309}, 0x1EC2}, // 'Ê' + combining -> 'Ể' - {{0x00CF, 0x0301}, 0x1E2E}, // 'Ï' + combining -> 'Ḯ' - {{0x00D4, 0x0300}, 0x1ED2}, // 'Ô' + combining -> 'Ồ' - {{0x00D4, 0x0301}, 0x1ED0}, // 'Ô' + combining -> 'Ố' - {{0x00D4, 0x0303}, 0x1ED6}, // 'Ô' + combining -> 'Ỗ' - {{0x00D4, 0x0309}, 0x1ED4}, // 'Ô' + combining -> 'Ổ' - {{0x00D5, 0x0301}, 0x1E4C}, // 'Õ' + combining -> 'Ṍ' - {{0x00D5, 0x0304}, 0x022C}, // 'Õ' + combining -> 'Ȭ' - {{0x00D5, 0x0308}, 0x1E4E}, // 'Õ' + combining -> 'Ṏ' - {{0x00D6, 0x0304}, 0x022A}, // 'Ö' + combining -> 'Ȫ' - {{0x00D8, 0x0301}, 0x01FE}, // 'Ø' + combining -> 'Ǿ' - {{0x00DC, 0x0300}, 0x01DB}, // 'Ü' + combining -> 'Ǜ' - {{0x00DC, 0x0301}, 0x01D7}, // 'Ü' + combining -> 'Ǘ' - {{0x00DC, 0x0304}, 0x01D5}, // 'Ü' + combining -> 'Ǖ' - {{0x00DC, 0x030C}, 0x01D9}, // 'Ü' + combining -> 'Ǚ' - {{0x00E2, 0x0300}, 0x1EA7}, // 'â' + combining -> 'ầ' - {{0x00E2, 0x0301}, 0x1EA5}, // 'â' + combining -> 'ấ' - {{0x00E2, 0x0303}, 0x1EAB}, // 'â' + combining -> 'ẫ' - {{0x00E2, 0x0309}, 0x1EA9}, // 'â' + combining -> 'ẩ' - {{0x00E4, 0x0304}, 0x01DF}, // 'ä' + combining -> 'ǟ' - {{0x00E5, 0x0301}, 0x01FB}, // 'å' + combining -> 'ǻ' - {{0x00E6, 0x0301}, 0x01FD}, // 'æ' + combining -> 'ǽ' - {{0x00E6, 0x0304}, 0x01E3}, // 'æ' + combining -> 'ǣ' - {{0x00E7, 0x0301}, 0x1E09}, // 'ç' + combining -> 'ḉ' - {{0x00EA, 0x0300}, 0x1EC1}, // 'ê' + combining -> 'ề' - {{0x00EA, 0x0301}, 0x1EBF}, // 'ê' + combining -> 'ế' - {{0x00EA, 0x0303}, 0x1EC5}, // 'ê' + combining -> 'ễ' - {{0x00EA, 0x0309}, 0x1EC3}, // 'ê' + combining -> 'ể' - {{0x00EF, 0x0301}, 0x1E2F}, // 'ï' + combining -> 'ḯ' - {{0x00F4, 0x0300}, 0x1ED3}, // 'ô' + combining -> 'ồ' - {{0x00F4, 0x0301}, 0x1ED1}, // 'ô' + combining -> 'ố' - {{0x00F4, 0x0303}, 0x1ED7}, // 'ô' + combining -> 'ỗ' - {{0x00F4, 0x0309}, 0x1ED5}, // 'ô' + combining -> 'ổ' - {{0x00F5, 0x0301}, 0x1E4D}, // 'õ' + combining -> 'ṍ' - {{0x00F5, 0x0304}, 0x022D}, // 'õ' + combining -> 'ȭ' - {{0x00F5, 0x0308}, 0x1E4F}, // 'õ' + combining -> 'ṏ' - {{0x00F6, 0x0304}, 0x022B}, // 'ö' + combining -> 'ȫ' - {{0x00F8, 0x0301}, 0x01FF}, // 'ø' + combining -> 'ǿ' - {{0x00FC, 0x0300}, 0x01DC}, // 'ü' + combining -> 'ǜ' - {{0x00FC, 0x0301}, 0x01D8}, // 'ü' + combining -> 'ǘ' - {{0x00FC, 0x0304}, 0x01D6}, // 'ü' + combining -> 'ǖ' - {{0x00FC, 0x030C}, 0x01DA}, // 'ü' + combining -> 'ǚ' - {{0x0102, 0x0300}, 0x1EB0}, // 'Ă' + combining -> 'Ằ' - {{0x0102, 0x0301}, 0x1EAE}, // 'Ă' + combining -> 'Ắ' - {{0x0102, 0x0303}, 0x1EB4}, // 'Ă' + combining -> 'Ẵ' - {{0x0102, 0x0309}, 0x1EB2}, // 'Ă' + combining -> 'Ẳ' - {{0x0103, 0x0300}, 0x1EB1}, // 'ă' + combining -> 'ằ' - {{0x0103, 0x0301}, 0x1EAF}, // 'ă' + combining -> 'ắ' - {{0x0103, 0x0303}, 0x1EB5}, // 'ă' + combining -> 'ẵ' - {{0x0103, 0x0309}, 0x1EB3}, // 'ă' + combining -> 'ẳ' - {{0x0112, 0x0300}, 0x1E14}, // 'Ē' + combining -> 'Ḕ' - {{0x0112, 0x0301}, 0x1E16}, // 'Ē' + combining -> 'Ḗ' - {{0x0113, 0x0300}, 0x1E15}, // 'ē' + combining -> 'ḕ' - {{0x0113, 0x0301}, 0x1E17}, // 'ē' + combining -> 'ḗ' - {{0x014C, 0x0300}, 0x1E50}, // 'Ō' + combining -> 'Ṑ' - {{0x014C, 0x0301}, 0x1E52}, // 'Ō' + combining -> 'Ṓ' - {{0x014D, 0x0300}, 0x1E51}, // 'ō' + combining -> 'ṑ' - {{0x014D, 0x0301}, 0x1E53}, // 'ō' + combining -> 'ṓ' - {{0x015A, 0x0307}, 0x1E64}, // 'Ś' + combining -> 'Ṥ' - {{0x015B, 0x0307}, 0x1E65}, // 'ś' + combining -> 'ṥ' - {{0x0160, 0x0307}, 0x1E66}, // 'Š' + combining -> 'Ṧ' - {{0x0161, 0x0307}, 0x1E67}, // 'š' + combining -> 'ṧ' - {{0x0168, 0x0301}, 0x1E78}, // 'Ũ' + combining -> 'Ṹ' - {{0x0169, 0x0301}, 0x1E79}, // 'ũ' + combining -> 'ṹ' - {{0x016A, 0x0308}, 0x1E7A}, // 'Ū' + combining -> 'Ṻ' - {{0x016B, 0x0308}, 0x1E7B}, // 'ū' + combining -> 'ṻ' - {{0x017F, 0x0307}, 0x1E9B}, // 'ſ' + combining -> 'ẛ' - {{0x01A0, 0x0300}, 0x1EDC}, // 'Ơ' + combining -> 'Ờ' - {{0x01A0, 0x0301}, 0x1EDA}, // 'Ơ' + combining -> 'Ớ' - {{0x01A0, 0x0303}, 0x1EE0}, // 'Ơ' + combining -> 'Ỡ' - {{0x01A0, 0x0309}, 0x1EDE}, // 'Ơ' + combining -> 'Ở' - {{0x01A0, 0x0323}, 0x1EE2}, // 'Ơ' + combining -> 'Ợ' - {{0x01A1, 0x0300}, 0x1EDD}, // 'ơ' + combining -> 'ờ' - {{0x01A1, 0x0301}, 0x1EDB}, // 'ơ' + combining -> 'ớ' - {{0x01A1, 0x0303}, 0x1EE1}, // 'ơ' + combining -> 'ỡ' - {{0x01A1, 0x0309}, 0x1EDF}, // 'ơ' + combining -> 'ở' - {{0x01A1, 0x0323}, 0x1EE3}, // 'ơ' + combining -> 'ợ' - {{0x01AF, 0x0300}, 0x1EEA}, // 'Ư' + combining -> 'Ừ' - {{0x01AF, 0x0301}, 0x1EE8}, // 'Ư' + combining -> 'Ứ' - {{0x01AF, 0x0303}, 0x1EEE}, // 'Ư' + combining -> 'Ữ' - {{0x01AF, 0x0309}, 0x1EEC}, // 'Ư' + combining -> 'Ử' - {{0x01AF, 0x0323}, 0x1EF0}, // 'Ư' + combining -> 'Ự' - {{0x01B0, 0x0300}, 0x1EEB}, // 'ư' + combining -> 'ừ' - {{0x01B0, 0x0301}, 0x1EE9}, // 'ư' + combining -> 'ứ' - {{0x01B0, 0x0303}, 0x1EEF}, // 'ư' + combining -> 'ữ' - {{0x01B0, 0x0309}, 0x1EED}, // 'ư' + combining -> 'ử' - {{0x01B0, 0x0323}, 0x1EF1}, // 'ư' + combining -> 'ự' - {{0x01B7, 0x030C}, 0x01EE}, // 'Ʒ' + combining -> 'Ǯ' - {{0x01EA, 0x0304}, 0x01EC}, // 'Ǫ' + combining -> 'Ǭ' - {{0x01EB, 0x0304}, 0x01ED}, // 'ǫ' + combining -> 'ǭ' - {{0x0226, 0x0304}, 0x01E0}, // 'Ȧ' + combining -> 'Ǡ' - {{0x0227, 0x0304}, 0x01E1}, // 'ȧ' + combining -> 'ǡ' - {{0x0228, 0x0306}, 0x1E1C}, // 'Ȩ' + combining -> 'Ḝ' - {{0x0229, 0x0306}, 0x1E1D}, // 'ȩ' + combining -> 'ḝ' - {{0x022E, 0x0304}, 0x0230}, // 'Ȯ' + combining -> 'Ȱ' - {{0x022F, 0x0304}, 0x0231}, // 'ȯ' + combining -> 'ȱ' - {{0x0292, 0x030C}, 0x01EF}, // 'ʒ' + combining -> 'ǯ' - {{0x0308, 0x0301}, 0x0344}, // '̈' + combining -> '̈́' - {{0x0391, 0x0300}, 0x1FBA}, // 'Α' + combining -> 'Ὰ' - {{0x0391, 0x0301}, 0x0386}, // 'Α' + combining -> 'Ά' - {{0x0391, 0x0304}, 0x1FB9}, // 'Α' + combining -> 'Ᾱ' - {{0x0391, 0x0306}, 0x1FB8}, // 'Α' + combining -> 'Ᾰ' - {{0x0391, 0x0313}, 0x1F08}, // 'Α' + combining -> 'Ἀ' - {{0x0391, 0x0314}, 0x1F09}, // 'Α' + combining -> 'Ἁ' - {{0x0391, 0x0345}, 0x1FBC}, // 'Α' + combining -> 'ᾼ' - {{0x0395, 0x0300}, 0x1FC8}, // 'Ε' + combining -> 'Ὲ' - {{0x0395, 0x0301}, 0x0388}, // 'Ε' + combining -> 'Έ' - {{0x0395, 0x0313}, 0x1F18}, // 'Ε' + combining -> 'Ἐ' - {{0x0395, 0x0314}, 0x1F19}, // 'Ε' + combining -> 'Ἑ' - {{0x0397, 0x0300}, 0x1FCA}, // 'Η' + combining -> 'Ὴ' - {{0x0397, 0x0301}, 0x0389}, // 'Η' + combining -> 'Ή' - {{0x0397, 0x0313}, 0x1F28}, // 'Η' + combining -> 'Ἠ' - {{0x0397, 0x0314}, 0x1F29}, // 'Η' + combining -> 'Ἡ' - {{0x0397, 0x0345}, 0x1FCC}, // 'Η' + combining -> 'ῌ' - {{0x0399, 0x0300}, 0x1FDA}, // 'Ι' + combining -> 'Ὶ' - {{0x0399, 0x0301}, 0x038A}, // 'Ι' + combining -> 'Ί' - {{0x0399, 0x0304}, 0x1FD9}, // 'Ι' + combining -> 'Ῑ' - {{0x0399, 0x0306}, 0x1FD8}, // 'Ι' + combining -> 'Ῐ' - {{0x0399, 0x0308}, 0x03AA}, // 'Ι' + combining -> 'Ϊ' - {{0x0399, 0x0313}, 0x1F38}, // 'Ι' + combining -> 'Ἰ' - {{0x0399, 0x0314}, 0x1F39}, // 'Ι' + combining -> 'Ἱ' - {{0x039F, 0x0300}, 0x1FF8}, // 'Ο' + combining -> 'Ὸ' - {{0x039F, 0x0301}, 0x038C}, // 'Ο' + combining -> 'Ό' - {{0x039F, 0x0313}, 0x1F48}, // 'Ο' + combining -> 'Ὀ' - {{0x039F, 0x0314}, 0x1F49}, // 'Ο' + combining -> 'Ὁ' - {{0x03A1, 0x0314}, 0x1FEC}, // 'Ρ' + combining -> 'Ῥ' - {{0x03A5, 0x0300}, 0x1FEA}, // 'Υ' + combining -> 'Ὺ' - {{0x03A5, 0x0301}, 0x038E}, // 'Υ' + combining -> 'Ύ' - {{0x03A5, 0x0304}, 0x1FE9}, // 'Υ' + combining -> 'Ῡ' - {{0x03A5, 0x0306}, 0x1FE8}, // 'Υ' + combining -> 'Ῠ' - {{0x03A5, 0x0308}, 0x03AB}, // 'Υ' + combining -> 'Ϋ' - {{0x03A5, 0x0314}, 0x1F59}, // 'Υ' + combining -> 'Ὑ' - {{0x03A9, 0x0300}, 0x1FFA}, // 'Ω' + combining -> 'Ὼ' - {{0x03A9, 0x0301}, 0x038F}, // 'Ω' + combining -> 'Ώ' - {{0x03A9, 0x0313}, 0x1F68}, // 'Ω' + combining -> 'Ὠ' - {{0x03A9, 0x0314}, 0x1F69}, // 'Ω' + combining -> 'Ὡ' - {{0x03A9, 0x0345}, 0x1FFC}, // 'Ω' + combining -> 'ῼ' - {{0x03AC, 0x0345}, 0x1FB4}, // 'ά' + combining -> 'ᾴ' - {{0x03AE, 0x0345}, 0x1FC4}, // 'ή' + combining -> 'ῄ' - {{0x03B1, 0x0300}, 0x1F70}, // 'α' + combining -> 'ὰ' - {{0x03B1, 0x0301}, 0x03AC}, // 'α' + combining -> 'ά' - {{0x03B1, 0x0304}, 0x1FB1}, // 'α' + combining -> 'ᾱ' - {{0x03B1, 0x0306}, 0x1FB0}, // 'α' + combining -> 'ᾰ' - {{0x03B1, 0x0313}, 0x1F00}, // 'α' + combining -> 'ἀ' - {{0x03B1, 0x0314}, 0x1F01}, // 'α' + combining -> 'ἁ' - {{0x03B1, 0x0342}, 0x1FB6}, // 'α' + combining -> 'ᾶ' - {{0x03B1, 0x0345}, 0x1FB3}, // 'α' + combining -> 'ᾳ' - {{0x03B5, 0x0300}, 0x1F72}, // 'ε' + combining -> 'ὲ' - {{0x03B5, 0x0301}, 0x03AD}, // 'ε' + combining -> 'έ' - {{0x03B5, 0x0313}, 0x1F10}, // 'ε' + combining -> 'ἐ' - {{0x03B5, 0x0314}, 0x1F11}, // 'ε' + combining -> 'ἑ' - {{0x03B7, 0x0300}, 0x1F74}, // 'η' + combining -> 'ὴ' - {{0x03B7, 0x0301}, 0x03AE}, // 'η' + combining -> 'ή' - {{0x03B7, 0x0313}, 0x1F20}, // 'η' + combining -> 'ἠ' - {{0x03B7, 0x0314}, 0x1F21}, // 'η' + combining -> 'ἡ' - {{0x03B7, 0x0342}, 0x1FC6}, // 'η' + combining -> 'ῆ' - {{0x03B7, 0x0345}, 0x1FC3}, // 'η' + combining -> 'ῃ' - {{0x03B9, 0x0300}, 0x1F76}, // 'ι' + combining -> 'ὶ' - {{0x03B9, 0x0301}, 0x03AF}, // 'ι' + combining -> 'ί' - {{0x03B9, 0x0304}, 0x1FD1}, // 'ι' + combining -> 'ῑ' - {{0x03B9, 0x0306}, 0x1FD0}, // 'ι' + combining -> 'ῐ' - {{0x03B9, 0x0308}, 0x03CA}, // 'ι' + combining -> 'ϊ' - {{0x03B9, 0x0313}, 0x1F30}, // 'ι' + combining -> 'ἰ' - {{0x03B9, 0x0314}, 0x1F31}, // 'ι' + combining -> 'ἱ' - {{0x03B9, 0x0342}, 0x1FD6}, // 'ι' + combining -> 'ῖ' - {{0x03BF, 0x0300}, 0x1F78}, // 'ο' + combining -> 'ὸ' - {{0x03BF, 0x0301}, 0x03CC}, // 'ο' + combining -> 'ό' - {{0x03BF, 0x0313}, 0x1F40}, // 'ο' + combining -> 'ὀ' - {{0x03BF, 0x0314}, 0x1F41}, // 'ο' + combining -> 'ὁ' - {{0x03C1, 0x0313}, 0x1FE4}, // 'ρ' + combining -> 'ῤ' - {{0x03C1, 0x0314}, 0x1FE5}, // 'ρ' + combining -> 'ῥ' - {{0x03C5, 0x0300}, 0x1F7A}, // 'υ' + combining -> 'ὺ' - {{0x03C5, 0x0301}, 0x03CD}, // 'υ' + combining -> 'ύ' - {{0x03C5, 0x0304}, 0x1FE1}, // 'υ' + combining -> 'ῡ' - {{0x03C5, 0x0306}, 0x1FE0}, // 'υ' + combining -> 'ῠ' - {{0x03C5, 0x0308}, 0x03CB}, // 'υ' + combining -> 'ϋ' - {{0x03C5, 0x0313}, 0x1F50}, // 'υ' + combining -> 'ὐ' - {{0x03C5, 0x0314}, 0x1F51}, // 'υ' + combining -> 'ὑ' - {{0x03C5, 0x0342}, 0x1FE6}, // 'υ' + combining -> 'ῦ' - {{0x03C9, 0x0300}, 0x1F7C}, // 'ω' + combining -> 'ὼ' - {{0x03C9, 0x0301}, 0x03CE}, // 'ω' + combining -> 'ώ' - {{0x03C9, 0x0313}, 0x1F60}, // 'ω' + combining -> 'ὠ' - {{0x03C9, 0x0314}, 0x1F61}, // 'ω' + combining -> 'ὡ' - {{0x03C9, 0x0342}, 0x1FF6}, // 'ω' + combining -> 'ῶ' - {{0x03C9, 0x0345}, 0x1FF3}, // 'ω' + combining -> 'ῳ' - {{0x03CA, 0x0300}, 0x1FD2}, // 'ϊ' + combining -> 'ῒ' - {{0x03CA, 0x0301}, 0x0390}, // 'ϊ' + combining -> 'ΐ' - {{0x03CA, 0x0342}, 0x1FD7}, // 'ϊ' + combining -> 'ῗ' - {{0x03CB, 0x0300}, 0x1FE2}, // 'ϋ' + combining -> 'ῢ' - {{0x03CB, 0x0301}, 0x03B0}, // 'ϋ' + combining -> 'ΰ' - {{0x03CB, 0x0342}, 0x1FE7}, // 'ϋ' + combining -> 'ῧ' - {{0x03CE, 0x0345}, 0x1FF4}, // 'ώ' + combining -> 'ῴ' - {{0x03D2, 0x0301}, 0x03D3}, // 'ϒ' + combining -> 'ϓ' - {{0x03D2, 0x0308}, 0x03D4}, // 'ϒ' + combining -> 'ϔ' - {{0x0406, 0x0308}, 0x0407}, // 'І' + combining -> 'Ї' - {{0x0410, 0x0306}, 0x04D0}, // 'А' + combining -> 'Ӑ' - {{0x0410, 0x0308}, 0x04D2}, // 'А' + combining -> 'Ӓ' - {{0x0413, 0x0301}, 0x0403}, // 'Г' + combining -> 'Ѓ' - {{0x0415, 0x0300}, 0x0400}, // 'Е' + combining -> 'Ѐ' - {{0x0415, 0x0306}, 0x04D6}, // 'Е' + combining -> 'Ӗ' - {{0x0415, 0x0308}, 0x0401}, // 'Е' + combining -> 'Ё' - {{0x0416, 0x0306}, 0x04C1}, // 'Ж' + combining -> 'Ӂ' - {{0x0416, 0x0308}, 0x04DC}, // 'Ж' + combining -> 'Ӝ' - {{0x0417, 0x0308}, 0x04DE}, // 'З' + combining -> 'Ӟ' - {{0x0418, 0x0300}, 0x040D}, // 'И' + combining -> 'Ѝ' - {{0x0418, 0x0304}, 0x04E2}, // 'И' + combining -> 'Ӣ' - {{0x0418, 0x0306}, 0x0419}, // 'И' + combining -> 'Й' - {{0x0418, 0x0308}, 0x04E4}, // 'И' + combining -> 'Ӥ' - {{0x041A, 0x0301}, 0x040C}, // 'К' + combining -> 'Ќ' - {{0x041E, 0x0308}, 0x04E6}, // 'О' + combining -> 'Ӧ' - {{0x0423, 0x0304}, 0x04EE}, // 'У' + combining -> 'Ӯ' - {{0x0423, 0x0306}, 0x040E}, // 'У' + combining -> 'Ў' - {{0x0423, 0x0308}, 0x04F0}, // 'У' + combining -> 'Ӱ' - {{0x0423, 0x030B}, 0x04F2}, // 'У' + combining -> 'Ӳ' - {{0x0427, 0x0308}, 0x04F4}, // 'Ч' + combining -> 'Ӵ' - {{0x042B, 0x0308}, 0x04F8}, // 'Ы' + combining -> 'Ӹ' - {{0x042D, 0x0308}, 0x04EC}, // 'Э' + combining -> 'Ӭ' - {{0x0430, 0x0306}, 0x04D1}, // 'а' + combining -> 'ӑ' - {{0x0430, 0x0308}, 0x04D3}, // 'а' + combining -> 'ӓ' - {{0x0433, 0x0301}, 0x0453}, // 'г' + combining -> 'ѓ' - {{0x0435, 0x0300}, 0x0450}, // 'е' + combining -> 'ѐ' - {{0x0435, 0x0306}, 0x04D7}, // 'е' + combining -> 'ӗ' - {{0x0435, 0x0308}, 0x0451}, // 'е' + combining -> 'ё' - {{0x0436, 0x0306}, 0x04C2}, // 'ж' + combining -> 'ӂ' - {{0x0436, 0x0308}, 0x04DD}, // 'ж' + combining -> 'ӝ' - {{0x0437, 0x0308}, 0x04DF}, // 'з' + combining -> 'ӟ' - {{0x0438, 0x0300}, 0x045D}, // 'и' + combining -> 'ѝ' - {{0x0438, 0x0304}, 0x04E3}, // 'и' + combining -> 'ӣ' - {{0x0438, 0x0306}, 0x0439}, // 'и' + combining -> 'й' - {{0x0438, 0x0308}, 0x04E5}, // 'и' + combining -> 'ӥ' - {{0x043A, 0x0301}, 0x045C}, // 'к' + combining -> 'ќ' - {{0x043E, 0x0308}, 0x04E7}, // 'о' + combining -> 'ӧ' - {{0x0443, 0x0304}, 0x04EF}, // 'у' + combining -> 'ӯ' - {{0x0443, 0x0306}, 0x045E}, // 'у' + combining -> 'ў' - {{0x0443, 0x0308}, 0x04F1}, // 'у' + combining -> 'ӱ' - {{0x0443, 0x030B}, 0x04F3}, // 'у' + combining -> 'ӳ' - {{0x0447, 0x0308}, 0x04F5}, // 'ч' + combining -> 'ӵ' - {{0x044B, 0x0308}, 0x04F9}, // 'ы' + combining -> 'ӹ' - {{0x044D, 0x0308}, 0x04ED}, // 'э' + combining -> 'ӭ' - {{0x0456, 0x0308}, 0x0457}, // 'і' + combining -> 'ї' - {{0x0474, 0x030F}, 0x0476}, // 'Ѵ' + combining -> 'Ѷ' - {{0x0475, 0x030F}, 0x0477}, // 'ѵ' + combining -> 'ѷ' - {{0x04D8, 0x0308}, 0x04DA}, // 'Ә' + combining -> 'Ӛ' - {{0x04D9, 0x0308}, 0x04DB}, // 'ә' + combining -> 'ӛ' - {{0x04E8, 0x0308}, 0x04EA}, // 'Ө' + combining -> 'Ӫ' - {{0x04E9, 0x0308}, 0x04EB}, // 'ө' + combining -> 'ӫ' - {{0x05D0, 0x05B7}, 0xFB2E}, // 'א' + combining -> 'אַ' - {{0x05D0, 0x05B8}, 0xFB2F}, // 'א' + combining -> 'אָ' - {{0x05D0, 0x05BC}, 0xFB30}, // 'א' + combining -> 'אּ' - {{0x05D1, 0x05BC}, 0xFB31}, // 'ב' + combining -> 'בּ' - {{0x05D1, 0x05BF}, 0xFB4C}, // 'ב' + combining -> 'בֿ' - {{0x05D2, 0x05BC}, 0xFB32}, // 'ג' + combining -> 'גּ' - {{0x05D3, 0x05BC}, 0xFB33}, // 'ד' + combining -> 'דּ' - {{0x05D4, 0x05BC}, 0xFB34}, // 'ה' + combining -> 'הּ' - {{0x05D5, 0x05B9}, 0xFB4B}, // 'ו' + combining -> 'וֹ' - {{0x05D5, 0x05BC}, 0xFB35}, // 'ו' + combining -> 'וּ' - {{0x05D6, 0x05BC}, 0xFB36}, // 'ז' + combining -> 'זּ' - {{0x05D8, 0x05BC}, 0xFB38}, // 'ט' + combining -> 'טּ' - {{0x05D9, 0x05B4}, 0xFB1D}, // 'י' + combining -> 'יִ' - {{0x05D9, 0x05BC}, 0xFB39}, // 'י' + combining -> 'יּ' - {{0x05DA, 0x05BC}, 0xFB3A}, // 'ך' + combining -> 'ךּ' - {{0x05DB, 0x05BC}, 0xFB3B}, // 'כ' + combining -> 'כּ' - {{0x05DB, 0x05BF}, 0xFB4D}, // 'כ' + combining -> 'כֿ' - {{0x05DC, 0x05BC}, 0xFB3C}, // 'ל' + combining -> 'לּ' - {{0x05DE, 0x05BC}, 0xFB3E}, // 'מ' + combining -> 'מּ' - {{0x05E0, 0x05BC}, 0xFB40}, // 'נ' + combining -> 'נּ' - {{0x05E1, 0x05BC}, 0xFB41}, // 'ס' + combining -> 'סּ' - {{0x05E3, 0x05BC}, 0xFB43}, // 'ף' + combining -> 'ףּ' - {{0x05E4, 0x05BC}, 0xFB44}, // 'פ' + combining -> 'פּ' - {{0x05E4, 0x05BF}, 0xFB4E}, // 'פ' + combining -> 'פֿ' - {{0x05E6, 0x05BC}, 0xFB46}, // 'צ' + combining -> 'צּ' - {{0x05E7, 0x05BC}, 0xFB47}, // 'ק' + combining -> 'קּ' - {{0x05E8, 0x05BC}, 0xFB48}, // 'ר' + combining -> 'רּ' - {{0x05E9, 0x05BC}, 0xFB49}, // 'ש' + combining -> 'שּ' - {{0x05E9, 0x05C1}, 0xFB2A}, // 'ש' + combining -> 'שׁ' - {{0x05E9, 0x05C2}, 0xFB2B}, // 'ש' + combining -> 'שׂ' - {{0x05EA, 0x05BC}, 0xFB4A}, // 'ת' + combining -> 'תּ' - {{0x05F2, 0x05B7}, 0xFB1F}, // 'ײ' + combining -> 'ײַ' - {{0x0627, 0x0653}, 0x0622}, // 'ا' + combining -> 'آ' - {{0x0627, 0x0654}, 0x0623}, // 'ا' + combining -> 'أ' - {{0x0627, 0x0655}, 0x0625}, // 'ا' + combining -> 'إ' - {{0x0648, 0x0654}, 0x0624}, // 'و' + combining -> 'ؤ' - {{0x064A, 0x0654}, 0x0626}, // 'ي' + combining -> 'ئ' - {{0x06C1, 0x0654}, 0x06C2}, // 'ہ' + combining -> 'ۂ' - {{0x06D2, 0x0654}, 0x06D3}, // 'ے' + combining -> 'ۓ' - {{0x06D5, 0x0654}, 0x06C0}, // 'ە' + combining -> 'ۀ' - {{0x0915, 0x093C}, 0x0958}, // 'क' + combining -> 'क़' - {{0x0916, 0x093C}, 0x0959}, // 'ख' + combining -> 'ख़' - {{0x0917, 0x093C}, 0x095A}, // 'ग' + combining -> 'ग़' - {{0x091C, 0x093C}, 0x095B}, // 'ज' + combining -> 'ज़' - {{0x0921, 0x093C}, 0x095C}, // 'ड' + combining -> 'ड़' - {{0x0922, 0x093C}, 0x095D}, // 'ढ' + combining -> 'ढ़' - {{0x0928, 0x093C}, 0x0929}, // 'न' + combining -> 'ऩ' - {{0x092B, 0x093C}, 0x095E}, // 'फ' + combining -> 'फ़' - {{0x092F, 0x093C}, 0x095F}, // 'य' + combining -> 'य़' - {{0x0930, 0x093C}, 0x0931}, // 'र' + combining -> 'ऱ' - {{0x0933, 0x093C}, 0x0934}, // 'ळ' + combining -> 'ऴ' - {{0x09A1, 0x09BC}, 0x09DC}, // 'ড' + combining -> 'ড়' - {{0x09A2, 0x09BC}, 0x09DD}, // 'ঢ' + combining -> 'ঢ়' - {{0x09AF, 0x09BC}, 0x09DF}, // 'য' + combining -> 'য়' - {{0x09C7, 0x09BE}, 0x09CB}, // 'ে' + combining -> 'ো' - {{0x09C7, 0x09D7}, 0x09CC}, // 'ে' + combining -> 'ৌ' - {{0x0A16, 0x0A3C}, 0x0A59}, // 'ਖ' + combining -> 'ਖ਼' - {{0x0A17, 0x0A3C}, 0x0A5A}, // 'ਗ' + combining -> 'ਗ਼' - {{0x0A1C, 0x0A3C}, 0x0A5B}, // 'ਜ' + combining -> 'ਜ਼' - {{0x0A2B, 0x0A3C}, 0x0A5E}, // 'ਫ' + combining -> 'ਫ਼' - {{0x0A32, 0x0A3C}, 0x0A33}, // 'ਲ' + combining -> 'ਲ਼' - {{0x0A38, 0x0A3C}, 0x0A36}, // 'ਸ' + combining -> 'ਸ਼' - {{0x0B21, 0x0B3C}, 0x0B5C}, // 'ଡ' + combining -> 'ଡ଼' - {{0x0B22, 0x0B3C}, 0x0B5D}, // 'ଢ' + combining -> 'ଢ଼' - {{0x0B47, 0x0B3E}, 0x0B4B}, // 'େ' + combining -> 'ୋ' - {{0x0B47, 0x0B56}, 0x0B48}, // 'େ' + combining -> 'ୈ' - {{0x0B47, 0x0B57}, 0x0B4C}, // 'େ' + combining -> 'ୌ' - {{0x0B92, 0x0BD7}, 0x0B94}, // 'ஒ' + combining -> 'ஔ' - {{0x0BC6, 0x0BBE}, 0x0BCA}, // 'ெ' + combining -> 'ொ' - {{0x0BC6, 0x0BD7}, 0x0BCC}, // 'ெ' + combining -> 'ௌ' - {{0x0BC7, 0x0BBE}, 0x0BCB}, // 'ே' + combining -> 'ோ' - {{0x0C46, 0x0C56}, 0x0C48}, // 'ె' + combining -> 'ై' - {{0x0CBF, 0x0CD5}, 0x0CC0}, // 'ಿ' + combining -> 'ೀ' - {{0x0CC6, 0x0CC2}, 0x0CCA}, // 'ೆ' + combining -> 'ೊ' - {{0x0CC6, 0x0CD5}, 0x0CC7}, // 'ೆ' + combining -> 'ೇ' - {{0x0CC6, 0x0CD6}, 0x0CC8}, // 'ೆ' + combining -> 'ೈ' - {{0x0CCA, 0x0CD5}, 0x0CCB}, // 'ೊ' + combining -> 'ೋ' - {{0x0D46, 0x0D3E}, 0x0D4A}, // 'െ' + combining -> 'ൊ' - {{0x0D46, 0x0D57}, 0x0D4C}, // 'െ' + combining -> 'ൌ' - {{0x0D47, 0x0D3E}, 0x0D4B}, // 'േ' + combining -> 'ോ' - {{0x0DD9, 0x0DCA}, 0x0DDA}, // 'ෙ' + combining -> 'ේ' - {{0x0DD9, 0x0DCF}, 0x0DDC}, // 'ෙ' + combining -> 'ො' - {{0x0DD9, 0x0DDF}, 0x0DDE}, // 'ෙ' + combining -> 'ෞ' - {{0x0DDC, 0x0DCA}, 0x0DDD}, // 'ො' + combining -> 'ෝ' - {{0x0F40, 0x0FB5}, 0x0F69}, // 'ཀ' + combining -> 'ཀྵ' - {{0x0F42, 0x0FB7}, 0x0F43}, // 'ག' + combining -> 'གྷ' - {{0x0F4C, 0x0FB7}, 0x0F4D}, // 'ཌ' + combining -> 'ཌྷ' - {{0x0F51, 0x0FB7}, 0x0F52}, // 'ད' + combining -> 'དྷ' - {{0x0F56, 0x0FB7}, 0x0F57}, // 'བ' + combining -> 'བྷ' - {{0x0F5B, 0x0FB7}, 0x0F5C}, // 'ཛ' + combining -> 'ཛྷ' - {{0x0F71, 0x0F72}, 0x0F73}, // 'ཱ' + combining -> 'ཱི' - {{0x0F71, 0x0F74}, 0x0F75}, // 'ཱ' + combining -> 'ཱུ' - {{0x0F71, 0x0F80}, 0x0F81}, // 'ཱ' + combining -> 'ཱྀ' - {{0x0F90, 0x0FB5}, 0x0FB9}, // 'ྐ' + combining -> 'ྐྵ' - {{0x0F92, 0x0FB7}, 0x0F93}, // 'ྒ' + combining -> 'ྒྷ' - {{0x0F9C, 0x0FB7}, 0x0F9D}, // 'ྜ' + combining -> 'ྜྷ' - {{0x0FA1, 0x0FB7}, 0x0FA2}, // 'ྡ' + combining -> 'ྡྷ' - {{0x0FA6, 0x0FB7}, 0x0FA7}, // 'ྦ' + combining -> 'ྦྷ' - {{0x0FAB, 0x0FB7}, 0x0FAC}, // 'ྫ' + combining -> 'ྫྷ' - {{0x0FB2, 0x0F80}, 0x0F76}, // 'ྲ' + combining -> 'ྲྀ' - {{0x0FB3, 0x0F80}, 0x0F78}, // 'ླ' + combining -> 'ླྀ' - {{0x1025, 0x102E}, 0x1026}, // 'ဥ' + combining -> 'ဦ' - {{0x1B05, 0x1B35}, 0x1B06}, // 'ᬅ' + combining -> 'ᬆ' - {{0x1B07, 0x1B35}, 0x1B08}, // 'ᬇ' + combining -> 'ᬈ' - {{0x1B09, 0x1B35}, 0x1B0A}, // 'ᬉ' + combining -> 'ᬊ' - {{0x1B0B, 0x1B35}, 0x1B0C}, // 'ᬋ' + combining -> 'ᬌ' - {{0x1B0D, 0x1B35}, 0x1B0E}, // 'ᬍ' + combining -> 'ᬎ' - {{0x1B11, 0x1B35}, 0x1B12}, // 'ᬑ' + combining -> 'ᬒ' - {{0x1B3A, 0x1B35}, 0x1B3B}, // 'ᬺ' + combining -> 'ᬻ' - {{0x1B3C, 0x1B35}, 0x1B3D}, // 'ᬼ' + combining -> 'ᬽ' - {{0x1B3E, 0x1B35}, 0x1B40}, // 'ᬾ' + combining -> 'ᭀ' - {{0x1B3F, 0x1B35}, 0x1B41}, // 'ᬿ' + combining -> 'ᭁ' - {{0x1B42, 0x1B35}, 0x1B43}, // 'ᭂ' + combining -> 'ᭃ' - {{0x1E36, 0x0304}, 0x1E38}, // 'Ḷ' + combining -> 'Ḹ' - {{0x1E37, 0x0304}, 0x1E39}, // 'ḷ' + combining -> 'ḹ' - {{0x1E5A, 0x0304}, 0x1E5C}, // 'Ṛ' + combining -> 'Ṝ' - {{0x1E5B, 0x0304}, 0x1E5D}, // 'ṛ' + combining -> 'ṝ' - {{0x1E62, 0x0307}, 0x1E68}, // 'Ṣ' + combining -> 'Ṩ' - {{0x1E63, 0x0307}, 0x1E69}, // 'ṣ' + combining -> 'ṩ' - {{0x1EA0, 0x0302}, 0x1EAC}, // 'Ạ' + combining -> 'Ậ' - {{0x1EA0, 0x0306}, 0x1EB6}, // 'Ạ' + combining -> 'Ặ' - {{0x1EA1, 0x0302}, 0x1EAD}, // 'ạ' + combining -> 'ậ' - {{0x1EA1, 0x0306}, 0x1EB7}, // 'ạ' + combining -> 'ặ' - {{0x1EB8, 0x0302}, 0x1EC6}, // 'Ẹ' + combining -> 'Ệ' - {{0x1EB9, 0x0302}, 0x1EC7}, // 'ẹ' + combining -> 'ệ' - {{0x1ECC, 0x0302}, 0x1ED8}, // 'Ọ' + combining -> 'Ộ' - {{0x1ECD, 0x0302}, 0x1ED9}, // 'ọ' + combining -> 'ộ' - {{0x1F00, 0x0300}, 0x1F02}, // 'ἀ' + combining -> 'ἂ' - {{0x1F00, 0x0301}, 0x1F04}, // 'ἀ' + combining -> 'ἄ' - {{0x1F00, 0x0342}, 0x1F06}, // 'ἀ' + combining -> 'ἆ' - {{0x1F00, 0x0345}, 0x1F80}, // 'ἀ' + combining -> 'ᾀ' - {{0x1F01, 0x0300}, 0x1F03}, // 'ἁ' + combining -> 'ἃ' - {{0x1F01, 0x0301}, 0x1F05}, // 'ἁ' + combining -> 'ἅ' - {{0x1F01, 0x0342}, 0x1F07}, // 'ἁ' + combining -> 'ἇ' - {{0x1F01, 0x0345}, 0x1F81}, // 'ἁ' + combining -> 'ᾁ' - {{0x1F02, 0x0345}, 0x1F82}, // 'ἂ' + combining -> 'ᾂ' - {{0x1F03, 0x0345}, 0x1F83}, // 'ἃ' + combining -> 'ᾃ' - {{0x1F04, 0x0345}, 0x1F84}, // 'ἄ' + combining -> 'ᾄ' - {{0x1F05, 0x0345}, 0x1F85}, // 'ἅ' + combining -> 'ᾅ' - {{0x1F06, 0x0345}, 0x1F86}, // 'ἆ' + combining -> 'ᾆ' - {{0x1F07, 0x0345}, 0x1F87}, // 'ἇ' + combining -> 'ᾇ' - {{0x1F08, 0x0300}, 0x1F0A}, // 'Ἀ' + combining -> 'Ἂ' - {{0x1F08, 0x0301}, 0x1F0C}, // 'Ἀ' + combining -> 'Ἄ' - {{0x1F08, 0x0342}, 0x1F0E}, // 'Ἀ' + combining -> 'Ἆ' - {{0x1F08, 0x0345}, 0x1F88}, // 'Ἀ' + combining -> 'ᾈ' - {{0x1F09, 0x0300}, 0x1F0B}, // 'Ἁ' + combining -> 'Ἃ' - {{0x1F09, 0x0301}, 0x1F0D}, // 'Ἁ' + combining -> 'Ἅ' - {{0x1F09, 0x0342}, 0x1F0F}, // 'Ἁ' + combining -> 'Ἇ' - {{0x1F09, 0x0345}, 0x1F89}, // 'Ἁ' + combining -> 'ᾉ' - {{0x1F0A, 0x0345}, 0x1F8A}, // 'Ἂ' + combining -> 'ᾊ' - {{0x1F0B, 0x0345}, 0x1F8B}, // 'Ἃ' + combining -> 'ᾋ' - {{0x1F0C, 0x0345}, 0x1F8C}, // 'Ἄ' + combining -> 'ᾌ' - {{0x1F0D, 0x0345}, 0x1F8D}, // 'Ἅ' + combining -> 'ᾍ' - {{0x1F0E, 0x0345}, 0x1F8E}, // 'Ἆ' + combining -> 'ᾎ' - {{0x1F0F, 0x0345}, 0x1F8F}, // 'Ἇ' + combining -> 'ᾏ' - {{0x1F10, 0x0300}, 0x1F12}, // 'ἐ' + combining -> 'ἒ' - {{0x1F10, 0x0301}, 0x1F14}, // 'ἐ' + combining -> 'ἔ' - {{0x1F11, 0x0300}, 0x1F13}, // 'ἑ' + combining -> 'ἓ' - {{0x1F11, 0x0301}, 0x1F15}, // 'ἑ' + combining -> 'ἕ' - {{0x1F18, 0x0300}, 0x1F1A}, // 'Ἐ' + combining -> 'Ἒ' - {{0x1F18, 0x0301}, 0x1F1C}, // 'Ἐ' + combining -> 'Ἔ' - {{0x1F19, 0x0300}, 0x1F1B}, // 'Ἑ' + combining -> 'Ἓ' - {{0x1F19, 0x0301}, 0x1F1D}, // 'Ἑ' + combining -> 'Ἕ' - {{0x1F20, 0x0300}, 0x1F22}, // 'ἠ' + combining -> 'ἢ' - {{0x1F20, 0x0301}, 0x1F24}, // 'ἠ' + combining -> 'ἤ' - {{0x1F20, 0x0342}, 0x1F26}, // 'ἠ' + combining -> 'ἦ' - {{0x1F20, 0x0345}, 0x1F90}, // 'ἠ' + combining -> 'ᾐ' - {{0x1F21, 0x0300}, 0x1F23}, // 'ἡ' + combining -> 'ἣ' - {{0x1F21, 0x0301}, 0x1F25}, // 'ἡ' + combining -> 'ἥ' - {{0x1F21, 0x0342}, 0x1F27}, // 'ἡ' + combining -> 'ἧ' - {{0x1F21, 0x0345}, 0x1F91}, // 'ἡ' + combining -> 'ᾑ' - {{0x1F22, 0x0345}, 0x1F92}, // 'ἢ' + combining -> 'ᾒ' - {{0x1F23, 0x0345}, 0x1F93}, // 'ἣ' + combining -> 'ᾓ' - {{0x1F24, 0x0345}, 0x1F94}, // 'ἤ' + combining -> 'ᾔ' - {{0x1F25, 0x0345}, 0x1F95}, // 'ἥ' + combining -> 'ᾕ' - {{0x1F26, 0x0345}, 0x1F96}, // 'ἦ' + combining -> 'ᾖ' - {{0x1F27, 0x0345}, 0x1F97}, // 'ἧ' + combining -> 'ᾗ' - {{0x1F28, 0x0300}, 0x1F2A}, // 'Ἠ' + combining -> 'Ἢ' - {{0x1F28, 0x0301}, 0x1F2C}, // 'Ἠ' + combining -> 'Ἤ' - {{0x1F28, 0x0342}, 0x1F2E}, // 'Ἠ' + combining -> 'Ἦ' - {{0x1F28, 0x0345}, 0x1F98}, // 'Ἠ' + combining -> 'ᾘ' - {{0x1F29, 0x0300}, 0x1F2B}, // 'Ἡ' + combining -> 'Ἣ' - {{0x1F29, 0x0301}, 0x1F2D}, // 'Ἡ' + combining -> 'Ἥ' - {{0x1F29, 0x0342}, 0x1F2F}, // 'Ἡ' + combining -> 'Ἧ' - {{0x1F29, 0x0345}, 0x1F99}, // 'Ἡ' + combining -> 'ᾙ' - {{0x1F2A, 0x0345}, 0x1F9A}, // 'Ἢ' + combining -> 'ᾚ' - {{0x1F2B, 0x0345}, 0x1F9B}, // 'Ἣ' + combining -> 'ᾛ' - {{0x1F2C, 0x0345}, 0x1F9C}, // 'Ἤ' + combining -> 'ᾜ' - {{0x1F2D, 0x0345}, 0x1F9D}, // 'Ἥ' + combining -> 'ᾝ' - {{0x1F2E, 0x0345}, 0x1F9E}, // 'Ἦ' + combining -> 'ᾞ' - {{0x1F2F, 0x0345}, 0x1F9F}, // 'Ἧ' + combining -> 'ᾟ' - {{0x1F30, 0x0300}, 0x1F32}, // 'ἰ' + combining -> 'ἲ' - {{0x1F30, 0x0301}, 0x1F34}, // 'ἰ' + combining -> 'ἴ' - {{0x1F30, 0x0342}, 0x1F36}, // 'ἰ' + combining -> 'ἶ' - {{0x1F31, 0x0300}, 0x1F33}, // 'ἱ' + combining -> 'ἳ' - {{0x1F31, 0x0301}, 0x1F35}, // 'ἱ' + combining -> 'ἵ' - {{0x1F31, 0x0342}, 0x1F37}, // 'ἱ' + combining -> 'ἷ' - {{0x1F38, 0x0300}, 0x1F3A}, // 'Ἰ' + combining -> 'Ἲ' - {{0x1F38, 0x0301}, 0x1F3C}, // 'Ἰ' + combining -> 'Ἴ' - {{0x1F38, 0x0342}, 0x1F3E}, // 'Ἰ' + combining -> 'Ἶ' - {{0x1F39, 0x0300}, 0x1F3B}, // 'Ἱ' + combining -> 'Ἳ' - {{0x1F39, 0x0301}, 0x1F3D}, // 'Ἱ' + combining -> 'Ἵ' - {{0x1F39, 0x0342}, 0x1F3F}, // 'Ἱ' + combining -> 'Ἷ' - {{0x1F40, 0x0300}, 0x1F42}, // 'ὀ' + combining -> 'ὂ' - {{0x1F40, 0x0301}, 0x1F44}, // 'ὀ' + combining -> 'ὄ' - {{0x1F41, 0x0300}, 0x1F43}, // 'ὁ' + combining -> 'ὃ' - {{0x1F41, 0x0301}, 0x1F45}, // 'ὁ' + combining -> 'ὅ' - {{0x1F48, 0x0300}, 0x1F4A}, // 'Ὀ' + combining -> 'Ὂ' - {{0x1F48, 0x0301}, 0x1F4C}, // 'Ὀ' + combining -> 'Ὄ' - {{0x1F49, 0x0300}, 0x1F4B}, // 'Ὁ' + combining -> 'Ὃ' - {{0x1F49, 0x0301}, 0x1F4D}, // 'Ὁ' + combining -> 'Ὅ' - {{0x1F50, 0x0300}, 0x1F52}, // 'ὐ' + combining -> 'ὒ' - {{0x1F50, 0x0301}, 0x1F54}, // 'ὐ' + combining -> 'ὔ' - {{0x1F50, 0x0342}, 0x1F56}, // 'ὐ' + combining -> 'ὖ' - {{0x1F51, 0x0300}, 0x1F53}, // 'ὑ' + combining -> 'ὓ' - {{0x1F51, 0x0301}, 0x1F55}, // 'ὑ' + combining -> 'ὕ' - {{0x1F51, 0x0342}, 0x1F57}, // 'ὑ' + combining -> 'ὗ' - {{0x1F59, 0x0300}, 0x1F5B}, // 'Ὑ' + combining -> 'Ὓ' - {{0x1F59, 0x0301}, 0x1F5D}, // 'Ὑ' + combining -> 'Ὕ' - {{0x1F59, 0x0342}, 0x1F5F}, // 'Ὑ' + combining -> 'Ὗ' - {{0x1F60, 0x0300}, 0x1F62}, // 'ὠ' + combining -> 'ὢ' - {{0x1F60, 0x0301}, 0x1F64}, // 'ὠ' + combining -> 'ὤ' - {{0x1F60, 0x0342}, 0x1F66}, // 'ὠ' + combining -> 'ὦ' - {{0x1F60, 0x0345}, 0x1FA0}, // 'ὠ' + combining -> 'ᾠ' - {{0x1F61, 0x0300}, 0x1F63}, // 'ὡ' + combining -> 'ὣ' - {{0x1F61, 0x0301}, 0x1F65}, // 'ὡ' + combining -> 'ὥ' - {{0x1F61, 0x0342}, 0x1F67}, // 'ὡ' + combining -> 'ὧ' - {{0x1F61, 0x0345}, 0x1FA1}, // 'ὡ' + combining -> 'ᾡ' - {{0x1F62, 0x0345}, 0x1FA2}, // 'ὢ' + combining -> 'ᾢ' - {{0x1F63, 0x0345}, 0x1FA3}, // 'ὣ' + combining -> 'ᾣ' - {{0x1F64, 0x0345}, 0x1FA4}, // 'ὤ' + combining -> 'ᾤ' - {{0x1F65, 0x0345}, 0x1FA5}, // 'ὥ' + combining -> 'ᾥ' - {{0x1F66, 0x0345}, 0x1FA6}, // 'ὦ' + combining -> 'ᾦ' - {{0x1F67, 0x0345}, 0x1FA7}, // 'ὧ' + combining -> 'ᾧ' - {{0x1F68, 0x0300}, 0x1F6A}, // 'Ὠ' + combining -> 'Ὢ' - {{0x1F68, 0x0301}, 0x1F6C}, // 'Ὠ' + combining -> 'Ὤ' - {{0x1F68, 0x0342}, 0x1F6E}, // 'Ὠ' + combining -> 'Ὦ' - {{0x1F68, 0x0345}, 0x1FA8}, // 'Ὠ' + combining -> 'ᾨ' - {{0x1F69, 0x0300}, 0x1F6B}, // 'Ὡ' + combining -> 'Ὣ' - {{0x1F69, 0x0301}, 0x1F6D}, // 'Ὡ' + combining -> 'Ὥ' - {{0x1F69, 0x0342}, 0x1F6F}, // 'Ὡ' + combining -> 'Ὧ' - {{0x1F69, 0x0345}, 0x1FA9}, // 'Ὡ' + combining -> 'ᾩ' - {{0x1F6A, 0x0345}, 0x1FAA}, // 'Ὢ' + combining -> 'ᾪ' - {{0x1F6B, 0x0345}, 0x1FAB}, // 'Ὣ' + combining -> 'ᾫ' - {{0x1F6C, 0x0345}, 0x1FAC}, // 'Ὤ' + combining -> 'ᾬ' - {{0x1F6D, 0x0345}, 0x1FAD}, // 'Ὥ' + combining -> 'ᾭ' - {{0x1F6E, 0x0345}, 0x1FAE}, // 'Ὦ' + combining -> 'ᾮ' - {{0x1F6F, 0x0345}, 0x1FAF}, // 'Ὧ' + combining -> 'ᾯ' - {{0x1F70, 0x0345}, 0x1FB2}, // 'ὰ' + combining -> 'ᾲ' - {{0x1F74, 0x0345}, 0x1FC2}, // 'ὴ' + combining -> 'ῂ' - {{0x1F7C, 0x0345}, 0x1FF2}, // 'ὼ' + combining -> 'ῲ' - {{0x1FB6, 0x0345}, 0x1FB7}, // 'ᾶ' + combining -> 'ᾷ' - {{0x1FBF, 0x0300}, 0x1FCD}, // '᾿' + combining -> '῍' - {{0x1FBF, 0x0301}, 0x1FCE}, // '᾿' + combining -> '῎' - {{0x1FBF, 0x0342}, 0x1FCF}, // '᾿' + combining -> '῏' - {{0x1FC6, 0x0345}, 0x1FC7}, // 'ῆ' + combining -> 'ῇ' - {{0x1FF6, 0x0345}, 0x1FF7}, // 'ῶ' + combining -> 'ῷ' - {{0x1FFE, 0x0300}, 0x1FDD}, // '῾' + combining -> '῝' - {{0x1FFE, 0x0301}, 0x1FDE}, // '῾' + combining -> '῞' - {{0x1FFE, 0x0342}, 0x1FDF}, // '῾' + combining -> '῟' - {{0x2190, 0x0338}, 0x219A}, // '←' + combining -> '↚' - {{0x2192, 0x0338}, 0x219B}, // '→' + combining -> '↛' - {{0x2194, 0x0338}, 0x21AE}, // '↔' + combining -> '↮' - {{0x21D0, 0x0338}, 0x21CD}, // '⇐' + combining -> '⇍' - {{0x21D2, 0x0338}, 0x21CF}, // '⇒' + combining -> '⇏' - {{0x21D4, 0x0338}, 0x21CE}, // '⇔' + combining -> '⇎' - {{0x2203, 0x0338}, 0x2204}, // '∃' + combining -> '∄' - {{0x2208, 0x0338}, 0x2209}, // '∈' + combining -> '∉' - {{0x220B, 0x0338}, 0x220C}, // '∋' + combining -> '∌' - {{0x2223, 0x0338}, 0x2224}, // '∣' + combining -> '∤' - {{0x2225, 0x0338}, 0x2226}, // '∥' + combining -> '∦' - {{0x223C, 0x0338}, 0x2241}, // '∼' + combining -> '≁' - {{0x2243, 0x0338}, 0x2244}, // '≃' + combining -> '≄' - {{0x2245, 0x0338}, 0x2247}, // '≅' + combining -> '≇' - {{0x2248, 0x0338}, 0x2249}, // '≈' + combining -> '≉' - {{0x224D, 0x0338}, 0x226D}, // '≍' + combining -> '≭' - {{0x2261, 0x0338}, 0x2262}, // '≡' + combining -> '≢' - {{0x2264, 0x0338}, 0x2270}, // '≤' + combining -> '≰' - {{0x2265, 0x0338}, 0x2271}, // '≥' + combining -> '≱' - {{0x2272, 0x0338}, 0x2274}, // '≲' + combining -> '≴' - {{0x2273, 0x0338}, 0x2275}, // '≳' + combining -> '≵' - {{0x2276, 0x0338}, 0x2278}, // '≶' + combining -> '≸' - {{0x2277, 0x0338}, 0x2279}, // '≷' + combining -> '≹' - {{0x227A, 0x0338}, 0x2280}, // '≺' + combining -> '⊀' - {{0x227B, 0x0338}, 0x2281}, // '≻' + combining -> '⊁' - {{0x227C, 0x0338}, 0x22E0}, // '≼' + combining -> '⋠' - {{0x227D, 0x0338}, 0x22E1}, // '≽' + combining -> '⋡' - {{0x2282, 0x0338}, 0x2284}, // '⊂' + combining -> '⊄' - {{0x2283, 0x0338}, 0x2285}, // '⊃' + combining -> '⊅' - {{0x2286, 0x0338}, 0x2288}, // '⊆' + combining -> '⊈' - {{0x2287, 0x0338}, 0x2289}, // '⊇' + combining -> '⊉' - {{0x2291, 0x0338}, 0x22E2}, // '⊑' + combining -> '⋢' - {{0x2292, 0x0338}, 0x22E3}, // '⊒' + combining -> '⋣' - {{0x22A2, 0x0338}, 0x22AC}, // '⊢' + combining -> '⊬' - {{0x22A8, 0x0338}, 0x22AD}, // '⊨' + combining -> '⊭' - {{0x22A9, 0x0338}, 0x22AE}, // '⊩' + combining -> '⊮' - {{0x22AB, 0x0338}, 0x22AF}, // '⊫' + combining -> '⊯' - {{0x22B2, 0x0338}, 0x22EA}, // '⊲' + combining -> '⋪' - {{0x22B3, 0x0338}, 0x22EB}, // '⊳' + combining -> '⋫' - {{0x22B4, 0x0338}, 0x22EC}, // '⊴' + combining -> '⋬' - {{0x22B5, 0x0338}, 0x22ED}, // '⊵' + combining -> '⋭' - {{0x2ADD, 0x0338}, 0x2ADC}, // '⫝' + combining -> '⫝̸' - {{0x3046, 0x3099}, 0x3094}, // 'う' + combining -> 'ゔ' - {{0x304B, 0x3099}, 0x304C}, // 'か' + combining -> 'が' - {{0x304D, 0x3099}, 0x304E}, // 'き' + combining -> 'ぎ' - {{0x304F, 0x3099}, 0x3050}, // 'く' + combining -> 'ぐ' - {{0x3051, 0x3099}, 0x3052}, // 'け' + combining -> 'げ' - {{0x3053, 0x3099}, 0x3054}, // 'こ' + combining -> 'ご' - {{0x3055, 0x3099}, 0x3056}, // 'さ' + combining -> 'ざ' - {{0x3057, 0x3099}, 0x3058}, // 'し' + combining -> 'じ' - {{0x3059, 0x3099}, 0x305A}, // 'す' + combining -> 'ず' - {{0x305B, 0x3099}, 0x305C}, // 'せ' + combining -> 'ぜ' - {{0x305D, 0x3099}, 0x305E}, // 'そ' + combining -> 'ぞ' - {{0x305F, 0x3099}, 0x3060}, // 'た' + combining -> 'だ' - {{0x3061, 0x3099}, 0x3062}, // 'ち' + combining -> 'ぢ' - {{0x3064, 0x3099}, 0x3065}, // 'つ' + combining -> 'づ' - {{0x3066, 0x3099}, 0x3067}, // 'て' + combining -> 'で' - {{0x3068, 0x3099}, 0x3069}, // 'と' + combining -> 'ど' - {{0x306F, 0x3099}, 0x3070}, // 'は' + combining -> 'ば' - {{0x306F, 0x309A}, 0x3071}, // 'は' + combining -> 'ぱ' - {{0x3072, 0x3099}, 0x3073}, // 'ひ' + combining -> 'び' - {{0x3072, 0x309A}, 0x3074}, // 'ひ' + combining -> 'ぴ' - {{0x3075, 0x3099}, 0x3076}, // 'ふ' + combining -> 'ぶ' - {{0x3075, 0x309A}, 0x3077}, // 'ふ' + combining -> 'ぷ' - {{0x3078, 0x3099}, 0x3079}, // 'へ' + combining -> 'べ' - {{0x3078, 0x309A}, 0x307A}, // 'へ' + combining -> 'ぺ' - {{0x307B, 0x3099}, 0x307C}, // 'ほ' + combining -> 'ぼ' - {{0x307B, 0x309A}, 0x307D}, // 'ほ' + combining -> 'ぽ' - {{0x309D, 0x3099}, 0x309E}, // 'ゝ' + combining -> 'ゞ' - {{0x30A6, 0x3099}, 0x30F4}, // 'ウ' + combining -> 'ヴ' - {{0x30AB, 0x3099}, 0x30AC}, // 'カ' + combining -> 'ガ' - {{0x30AD, 0x3099}, 0x30AE}, // 'キ' + combining -> 'ギ' - {{0x30AF, 0x3099}, 0x30B0}, // 'ク' + combining -> 'グ' - {{0x30B1, 0x3099}, 0x30B2}, // 'ケ' + combining -> 'ゲ' - {{0x30B3, 0x3099}, 0x30B4}, // 'コ' + combining -> 'ゴ' - {{0x30B5, 0x3099}, 0x30B6}, // 'サ' + combining -> 'ザ' - {{0x30B7, 0x3099}, 0x30B8}, // 'シ' + combining -> 'ジ' - {{0x30B9, 0x3099}, 0x30BA}, // 'ス' + combining -> 'ズ' - {{0x30BB, 0x3099}, 0x30BC}, // 'セ' + combining -> 'ゼ' - {{0x30BD, 0x3099}, 0x30BE}, // 'ソ' + combining -> 'ゾ' - {{0x30BF, 0x3099}, 0x30C0}, // 'タ' + combining -> 'ダ' - {{0x30C1, 0x3099}, 0x30C2}, // 'チ' + combining -> 'ヂ' - {{0x30C4, 0x3099}, 0x30C5}, // 'ツ' + combining -> 'ヅ' - {{0x30C6, 0x3099}, 0x30C7}, // 'テ' + combining -> 'デ' - {{0x30C8, 0x3099}, 0x30C9}, // 'ト' + combining -> 'ド' - {{0x30CF, 0x3099}, 0x30D0}, // 'ハ' + combining -> 'バ' - {{0x30CF, 0x309A}, 0x30D1}, // 'ハ' + combining -> 'パ' - {{0x30D2, 0x3099}, 0x30D3}, // 'ヒ' + combining -> 'ビ' - {{0x30D2, 0x309A}, 0x30D4}, // 'ヒ' + combining -> 'ピ' - {{0x30D5, 0x3099}, 0x30D6}, // 'フ' + combining -> 'ブ' - {{0x30D5, 0x309A}, 0x30D7}, // 'フ' + combining -> 'プ' - {{0x30D8, 0x3099}, 0x30D9}, // 'ヘ' + combining -> 'ベ' - {{0x30D8, 0x309A}, 0x30DA}, // 'ヘ' + combining -> 'ペ' - {{0x30DB, 0x3099}, 0x30DC}, // 'ホ' + combining -> 'ボ' - {{0x30DB, 0x309A}, 0x30DD}, // 'ホ' + combining -> 'ポ' - {{0x30EF, 0x3099}, 0x30F7}, // 'ワ' + combining -> 'ヷ' - {{0x30F0, 0x3099}, 0x30F8}, // 'ヰ' + combining -> 'ヸ' - {{0x30F1, 0x3099}, 0x30F9}, // 'ヱ' + combining -> 'ヹ' - {{0x30F2, 0x3099}, 0x30FA}, // 'ヲ' + combining -> 'ヺ' - {{0x30FD, 0x3099}, 0x30FE}, // 'ヽ' + combining -> 'ヾ' - {{0xFB49, 0x05C1}, 0xFB2C}, // 'שּ' + combining -> 'שּׁ' - {{0xFB49, 0x05C2}, 0xFB2D}, // 'שּ' + combining -> 'שּׂ' - {{0x105D2, 0x0307}, 0x105C9}, - {{0x105DA, 0x0307}, 0x105E4}, - {{0x11099, 0x110BA}, 0x1109A}, // '𑂙' + combining -> '𑂚' - {{0x1109B, 0x110BA}, 0x1109C}, // '𑂛' + combining -> '𑂜' - {{0x110A5, 0x110BA}, 0x110AB}, // '𑂥' + combining -> '𑂫' - {{0x11131, 0x11127}, 0x1112E}, // '𑄱' + combining -> '𑄮' - {{0x11132, 0x11127}, 0x1112F}, // '𑄲' + combining -> '𑄯' - {{0x11347, 0x1133E}, 0x1134B}, // '𑍇' + combining -> '𑍋' - {{0x11347, 0x11357}, 0x1134C}, // '𑍇' + combining -> '𑍌' - {{0x11382, 0x113C9}, 0x11383}, - {{0x11384, 0x113BB}, 0x11385}, - {{0x1138B, 0x113C2}, 0x1138E}, - {{0x11390, 0x113C9}, 0x11391}, - {{0x113C2, 0x113B8}, 0x113C7}, - {{0x113C2, 0x113C2}, 0x113C5}, - {{0x113C2, 0x113C9}, 0x113C8}, - {{0x114B9, 0x114B0}, 0x114BC}, // '𑒹' + combining -> '𑒼' - {{0x114B9, 0x114BA}, 0x114BB}, // '𑒹' + combining -> '𑒻' - {{0x114B9, 0x114BD}, 0x114BE}, // '𑒹' + combining -> '𑒾' - {{0x115B8, 0x115AF}, 0x115BA}, // '𑖸' + combining -> '𑖺' - {{0x115B9, 0x115AF}, 0x115BB}, // '𑖹' + combining -> '𑖻' - {{0x11935, 0x11930}, 0x11938}, // '𑤵' + combining -> '𑤸' - {{0x1611E, 0x1611E}, 0x16121}, - {{0x1611E, 0x1611F}, 0x16123}, - {{0x1611E, 0x16120}, 0x16125}, - {{0x1611E, 0x16129}, 0x16122}, - {{0x16121, 0x1611F}, 0x16126}, - {{0x16121, 0x16120}, 0x16128}, - {{0x16122, 0x1611F}, 0x16127}, - {{0x16129, 0x1611F}, 0x16124}, - {{0x16D63, 0x16D67}, 0x16D69}, - {{0x16D67, 0x16D67}, 0x16D68}, - {{0x16D69, 0x16D67}, 0x16D6A}, - {{0x1D157, 0x1D165}, 0x1D15E}, // '𝅗' + combining -> '𝅗𝅥' - {{0x1D158, 0x1D165}, 0x1D15F}, // '𝅘' + combining -> '𝅘𝅥' - {{0x1D15F, 0x1D16E}, 0x1D160}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅮' - {{0x1D15F, 0x1D16F}, 0x1D161}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅯' - {{0x1D15F, 0x1D170}, 0x1D162}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅰' - {{0x1D15F, 0x1D171}, 0x1D163}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅱' - {{0x1D15F, 0x1D172}, 0x1D164}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅲' - {{0x1D1B9, 0x1D165}, 0x1D1BB}, // '𝆹' + combining -> '𝆹𝅥' - {{0x1D1BA, 0x1D165}, 0x1D1BC}, // '𝆺' + combining -> '𝆺𝅥' - {{0x1D1BB, 0x1D16E}, 0x1D1BD}, // '𝆹𝅥' + combining -> '𝆹𝅥𝅮' - {{0x1D1BB, 0x1D16F}, 0x1D1BF}, // '𝆹𝅥' + combining -> '𝆹𝅥𝅯' - {{0x1D1BC, 0x1D16E}, 0x1D1BE}, // '𝆺𝅥' + combining -> '𝆺𝅥𝅮' - {{0x1D1BC, 0x1D16F}, 0x1D1C0}, // '𝆺𝅥' + combining -> '𝆺𝅥𝅯' +static const std::unordered_map, uint32_t, pair_hash> nfc_composition_table = { + {{0x003C, 0x0338}, 0x226E}, // '<' + combining -> '≮' + {{0x003D, 0x0338}, 0x2260}, // '=' + combining -> '≠' + {{0x003E, 0x0338}, 0x226F}, // '>' + combining -> '≯' + {{0x0041, 0x0300}, 0x00C0}, // 'A' + combining -> 'À' + {{0x0041, 0x0301}, 0x00C1}, // 'A' + combining -> 'Á' + {{0x0041, 0x0302}, 0x00C2}, // 'A' + combining -> 'Â' + {{0x0041, 0x0303}, 0x00C3}, // 'A' + combining -> 'Ã' + {{0x0041, 0x0304}, 0x0100}, // 'A' + combining -> 'Ā' + {{0x0041, 0x0306}, 0x0102}, // 'A' + combining -> 'Ă' + {{0x0041, 0x0307}, 0x0226}, // 'A' + combining -> 'Ȧ' + {{0x0041, 0x0308}, 0x00C4}, // 'A' + combining -> 'Ä' + {{0x0041, 0x0309}, 0x1EA2}, // 'A' + combining -> 'Ả' + {{0x0041, 0x030A}, 0x00C5}, // 'A' + combining -> 'Å' + {{0x0041, 0x030C}, 0x01CD}, // 'A' + combining -> 'Ǎ' + {{0x0041, 0x030F}, 0x0200}, // 'A' + combining -> 'Ȁ' + {{0x0041, 0x0311}, 0x0202}, // 'A' + combining -> 'Ȃ' + {{0x0041, 0x0323}, 0x1EA0}, // 'A' + combining -> 'Ạ' + {{0x0041, 0x0325}, 0x1E00}, // 'A' + combining -> 'Ḁ' + {{0x0041, 0x0328}, 0x0104}, // 'A' + combining -> 'Ą' + {{0x0042, 0x0307}, 0x1E02}, // 'B' + combining -> 'Ḃ' + {{0x0042, 0x0323}, 0x1E04}, // 'B' + combining -> 'Ḅ' + {{0x0042, 0x0331}, 0x1E06}, // 'B' + combining -> 'Ḇ' + {{0x0043, 0x0301}, 0x0106}, // 'C' + combining -> 'Ć' + {{0x0043, 0x0302}, 0x0108}, // 'C' + combining -> 'Ĉ' + {{0x0043, 0x0307}, 0x010A}, // 'C' + combining -> 'Ċ' + {{0x0043, 0x030C}, 0x010C}, // 'C' + combining -> 'Č' + {{0x0043, 0x0327}, 0x00C7}, // 'C' + combining -> 'Ç' + {{0x0044, 0x0307}, 0x1E0A}, // 'D' + combining -> 'Ḋ' + {{0x0044, 0x030C}, 0x010E}, // 'D' + combining -> 'Ď' + {{0x0044, 0x0323}, 0x1E0C}, // 'D' + combining -> 'Ḍ' + {{0x0044, 0x0327}, 0x1E10}, // 'D' + combining -> 'Ḑ' + {{0x0044, 0x032D}, 0x1E12}, // 'D' + combining -> 'Ḓ' + {{0x0044, 0x0331}, 0x1E0E}, // 'D' + combining -> 'Ḏ' + {{0x0045, 0x0300}, 0x00C8}, // 'E' + combining -> 'È' + {{0x0045, 0x0301}, 0x00C9}, // 'E' + combining -> 'É' + {{0x0045, 0x0302}, 0x00CA}, // 'E' + combining -> 'Ê' + {{0x0045, 0x0303}, 0x1EBC}, // 'E' + combining -> 'Ẽ' + {{0x0045, 0x0304}, 0x0112}, // 'E' + combining -> 'Ē' + {{0x0045, 0x0306}, 0x0114}, // 'E' + combining -> 'Ĕ' + {{0x0045, 0x0307}, 0x0116}, // 'E' + combining -> 'Ė' + {{0x0045, 0x0308}, 0x00CB}, // 'E' + combining -> 'Ë' + {{0x0045, 0x0309}, 0x1EBA}, // 'E' + combining -> 'Ẻ' + {{0x0045, 0x030C}, 0x011A}, // 'E' + combining -> 'Ě' + {{0x0045, 0x030F}, 0x0204}, // 'E' + combining -> 'Ȅ' + {{0x0045, 0x0311}, 0x0206}, // 'E' + combining -> 'Ȇ' + {{0x0045, 0x0323}, 0x1EB8}, // 'E' + combining -> 'Ẹ' + {{0x0045, 0x0327}, 0x0228}, // 'E' + combining -> 'Ȩ' + {{0x0045, 0x0328}, 0x0118}, // 'E' + combining -> 'Ę' + {{0x0045, 0x032D}, 0x1E18}, // 'E' + combining -> 'Ḙ' + {{0x0045, 0x0330}, 0x1E1A}, // 'E' + combining -> 'Ḛ' + {{0x0046, 0x0307}, 0x1E1E}, // 'F' + combining -> 'Ḟ' + {{0x0047, 0x0301}, 0x01F4}, // 'G' + combining -> 'Ǵ' + {{0x0047, 0x0302}, 0x011C}, // 'G' + combining -> 'Ĝ' + {{0x0047, 0x0304}, 0x1E20}, // 'G' + combining -> 'Ḡ' + {{0x0047, 0x0306}, 0x011E}, // 'G' + combining -> 'Ğ' + {{0x0047, 0x0307}, 0x0120}, // 'G' + combining -> 'Ġ' + {{0x0047, 0x030C}, 0x01E6}, // 'G' + combining -> 'Ǧ' + {{0x0047, 0x0327}, 0x0122}, // 'G' + combining -> 'Ģ' + {{0x0048, 0x0302}, 0x0124}, // 'H' + combining -> 'Ĥ' + {{0x0048, 0x0307}, 0x1E22}, // 'H' + combining -> 'Ḣ' + {{0x0048, 0x0308}, 0x1E26}, // 'H' + combining -> 'Ḧ' + {{0x0048, 0x030C}, 0x021E}, // 'H' + combining -> 'Ȟ' + {{0x0048, 0x0323}, 0x1E24}, // 'H' + combining -> 'Ḥ' + {{0x0048, 0x0327}, 0x1E28}, // 'H' + combining -> 'Ḩ' + {{0x0048, 0x032E}, 0x1E2A}, // 'H' + combining -> 'Ḫ' + {{0x0049, 0x0300}, 0x00CC}, // 'I' + combining -> 'Ì' + {{0x0049, 0x0301}, 0x00CD}, // 'I' + combining -> 'Í' + {{0x0049, 0x0302}, 0x00CE}, // 'I' + combining -> 'Î' + {{0x0049, 0x0303}, 0x0128}, // 'I' + combining -> 'Ĩ' + {{0x0049, 0x0304}, 0x012A}, // 'I' + combining -> 'Ī' + {{0x0049, 0x0306}, 0x012C}, // 'I' + combining -> 'Ĭ' + {{0x0049, 0x0307}, 0x0130}, // 'I' + combining -> 'İ' + {{0x0049, 0x0308}, 0x00CF}, // 'I' + combining -> 'Ï' + {{0x0049, 0x0309}, 0x1EC8}, // 'I' + combining -> 'Ỉ' + {{0x0049, 0x030C}, 0x01CF}, // 'I' + combining -> 'Ǐ' + {{0x0049, 0x030F}, 0x0208}, // 'I' + combining -> 'Ȉ' + {{0x0049, 0x0311}, 0x020A}, // 'I' + combining -> 'Ȋ' + {{0x0049, 0x0323}, 0x1ECA}, // 'I' + combining -> 'Ị' + {{0x0049, 0x0328}, 0x012E}, // 'I' + combining -> 'Į' + {{0x0049, 0x0330}, 0x1E2C}, // 'I' + combining -> 'Ḭ' + {{0x004A, 0x0302}, 0x0134}, // 'J' + combining -> 'Ĵ' + {{0x004B, 0x0301}, 0x1E30}, // 'K' + combining -> 'Ḱ' + {{0x004B, 0x030C}, 0x01E8}, // 'K' + combining -> 'Ǩ' + {{0x004B, 0x0323}, 0x1E32}, // 'K' + combining -> 'Ḳ' + {{0x004B, 0x0327}, 0x0136}, // 'K' + combining -> 'Ķ' + {{0x004B, 0x0331}, 0x1E34}, // 'K' + combining -> 'Ḵ' + {{0x004C, 0x0301}, 0x0139}, // 'L' + combining -> 'Ĺ' + {{0x004C, 0x030C}, 0x013D}, // 'L' + combining -> 'Ľ' + {{0x004C, 0x0323}, 0x1E36}, // 'L' + combining -> 'Ḷ' + {{0x004C, 0x0327}, 0x013B}, // 'L' + combining -> 'Ļ' + {{0x004C, 0x032D}, 0x1E3C}, // 'L' + combining -> 'Ḽ' + {{0x004C, 0x0331}, 0x1E3A}, // 'L' + combining -> 'Ḻ' + {{0x004D, 0x0301}, 0x1E3E}, // 'M' + combining -> 'Ḿ' + {{0x004D, 0x0307}, 0x1E40}, // 'M' + combining -> 'Ṁ' + {{0x004D, 0x0323}, 0x1E42}, // 'M' + combining -> 'Ṃ' + {{0x004E, 0x0300}, 0x01F8}, // 'N' + combining -> 'Ǹ' + {{0x004E, 0x0301}, 0x0143}, // 'N' + combining -> 'Ń' + {{0x004E, 0x0303}, 0x00D1}, // 'N' + combining -> 'Ñ' + {{0x004E, 0x0307}, 0x1E44}, // 'N' + combining -> 'Ṅ' + {{0x004E, 0x030C}, 0x0147}, // 'N' + combining -> 'Ň' + {{0x004E, 0x0323}, 0x1E46}, // 'N' + combining -> 'Ṇ' + {{0x004E, 0x0327}, 0x0145}, // 'N' + combining -> 'Ņ' + {{0x004E, 0x032D}, 0x1E4A}, // 'N' + combining -> 'Ṋ' + {{0x004E, 0x0331}, 0x1E48}, // 'N' + combining -> 'Ṉ' + {{0x004F, 0x0300}, 0x00D2}, // 'O' + combining -> 'Ò' + {{0x004F, 0x0301}, 0x00D3}, // 'O' + combining -> 'Ó' + {{0x004F, 0x0302}, 0x00D4}, // 'O' + combining -> 'Ô' + {{0x004F, 0x0303}, 0x00D5}, // 'O' + combining -> 'Õ' + {{0x004F, 0x0304}, 0x014C}, // 'O' + combining -> 'Ō' + {{0x004F, 0x0306}, 0x014E}, // 'O' + combining -> 'Ŏ' + {{0x004F, 0x0307}, 0x022E}, // 'O' + combining -> 'Ȯ' + {{0x004F, 0x0308}, 0x00D6}, // 'O' + combining -> 'Ö' + {{0x004F, 0x0309}, 0x1ECE}, // 'O' + combining -> 'Ỏ' + {{0x004F, 0x030B}, 0x0150}, // 'O' + combining -> 'Ő' + {{0x004F, 0x030C}, 0x01D1}, // 'O' + combining -> 'Ǒ' + {{0x004F, 0x030F}, 0x020C}, // 'O' + combining -> 'Ȍ' + {{0x004F, 0x0311}, 0x020E}, // 'O' + combining -> 'Ȏ' + {{0x004F, 0x031B}, 0x01A0}, // 'O' + combining -> 'Ơ' + {{0x004F, 0x0323}, 0x1ECC}, // 'O' + combining -> 'Ọ' + {{0x004F, 0x0328}, 0x01EA}, // 'O' + combining -> 'Ǫ' + {{0x0050, 0x0301}, 0x1E54}, // 'P' + combining -> 'Ṕ' + {{0x0050, 0x0307}, 0x1E56}, // 'P' + combining -> 'Ṗ' + {{0x0052, 0x0301}, 0x0154}, // 'R' + combining -> 'Ŕ' + {{0x0052, 0x0307}, 0x1E58}, // 'R' + combining -> 'Ṙ' + {{0x0052, 0x030C}, 0x0158}, // 'R' + combining -> 'Ř' + {{0x0052, 0x030F}, 0x0210}, // 'R' + combining -> 'Ȑ' + {{0x0052, 0x0311}, 0x0212}, // 'R' + combining -> 'Ȓ' + {{0x0052, 0x0323}, 0x1E5A}, // 'R' + combining -> 'Ṛ' + {{0x0052, 0x0327}, 0x0156}, // 'R' + combining -> 'Ŗ' + {{0x0052, 0x0331}, 0x1E5E}, // 'R' + combining -> 'Ṟ' + {{0x0053, 0x0301}, 0x015A}, // 'S' + combining -> 'Ś' + {{0x0053, 0x0302}, 0x015C}, // 'S' + combining -> 'Ŝ' + {{0x0053, 0x0307}, 0x1E60}, // 'S' + combining -> 'Ṡ' + {{0x0053, 0x030C}, 0x0160}, // 'S' + combining -> 'Š' + {{0x0053, 0x0323}, 0x1E62}, // 'S' + combining -> 'Ṣ' + {{0x0053, 0x0326}, 0x0218}, // 'S' + combining -> 'Ș' + {{0x0053, 0x0327}, 0x015E}, // 'S' + combining -> 'Ş' + {{0x0054, 0x0307}, 0x1E6A}, // 'T' + combining -> 'Ṫ' + {{0x0054, 0x030C}, 0x0164}, // 'T' + combining -> 'Ť' + {{0x0054, 0x0323}, 0x1E6C}, // 'T' + combining -> 'Ṭ' + {{0x0054, 0x0326}, 0x021A}, // 'T' + combining -> 'Ț' + {{0x0054, 0x0327}, 0x0162}, // 'T' + combining -> 'Ţ' + {{0x0054, 0x032D}, 0x1E70}, // 'T' + combining -> 'Ṱ' + {{0x0054, 0x0331}, 0x1E6E}, // 'T' + combining -> 'Ṯ' + {{0x0055, 0x0300}, 0x00D9}, // 'U' + combining -> 'Ù' + {{0x0055, 0x0301}, 0x00DA}, // 'U' + combining -> 'Ú' + {{0x0055, 0x0302}, 0x00DB}, // 'U' + combining -> 'Û' + {{0x0055, 0x0303}, 0x0168}, // 'U' + combining -> 'Ũ' + {{0x0055, 0x0304}, 0x016A}, // 'U' + combining -> 'Ū' + {{0x0055, 0x0306}, 0x016C}, // 'U' + combining -> 'Ŭ' + {{0x0055, 0x0308}, 0x00DC}, // 'U' + combining -> 'Ü' + {{0x0055, 0x0309}, 0x1EE6}, // 'U' + combining -> 'Ủ' + {{0x0055, 0x030A}, 0x016E}, // 'U' + combining -> 'Ů' + {{0x0055, 0x030B}, 0x0170}, // 'U' + combining -> 'Ű' + {{0x0055, 0x030C}, 0x01D3}, // 'U' + combining -> 'Ǔ' + {{0x0055, 0x030F}, 0x0214}, // 'U' + combining -> 'Ȕ' + {{0x0055, 0x0311}, 0x0216}, // 'U' + combining -> 'Ȗ' + {{0x0055, 0x031B}, 0x01AF}, // 'U' + combining -> 'Ư' + {{0x0055, 0x0323}, 0x1EE4}, // 'U' + combining -> 'Ụ' + {{0x0055, 0x0324}, 0x1E72}, // 'U' + combining -> 'Ṳ' + {{0x0055, 0x0328}, 0x0172}, // 'U' + combining -> 'Ų' + {{0x0055, 0x032D}, 0x1E76}, // 'U' + combining -> 'Ṷ' + {{0x0055, 0x0330}, 0x1E74}, // 'U' + combining -> 'Ṵ' + {{0x0056, 0x0303}, 0x1E7C}, // 'V' + combining -> 'Ṽ' + {{0x0056, 0x0323}, 0x1E7E}, // 'V' + combining -> 'Ṿ' + {{0x0057, 0x0300}, 0x1E80}, // 'W' + combining -> 'Ẁ' + {{0x0057, 0x0301}, 0x1E82}, // 'W' + combining -> 'Ẃ' + {{0x0057, 0x0302}, 0x0174}, // 'W' + combining -> 'Ŵ' + {{0x0057, 0x0307}, 0x1E86}, // 'W' + combining -> 'Ẇ' + {{0x0057, 0x0308}, 0x1E84}, // 'W' + combining -> 'Ẅ' + {{0x0057, 0x0323}, 0x1E88}, // 'W' + combining -> 'Ẉ' + {{0x0058, 0x0307}, 0x1E8A}, // 'X' + combining -> 'Ẋ' + {{0x0058, 0x0308}, 0x1E8C}, // 'X' + combining -> 'Ẍ' + {{0x0059, 0x0300}, 0x1EF2}, // 'Y' + combining -> 'Ỳ' + {{0x0059, 0x0301}, 0x00DD}, // 'Y' + combining -> 'Ý' + {{0x0059, 0x0302}, 0x0176}, // 'Y' + combining -> 'Ŷ' + {{0x0059, 0x0303}, 0x1EF8}, // 'Y' + combining -> 'Ỹ' + {{0x0059, 0x0304}, 0x0232}, // 'Y' + combining -> 'Ȳ' + {{0x0059, 0x0307}, 0x1E8E}, // 'Y' + combining -> 'Ẏ' + {{0x0059, 0x0308}, 0x0178}, // 'Y' + combining -> 'Ÿ' + {{0x0059, 0x0309}, 0x1EF6}, // 'Y' + combining -> 'Ỷ' + {{0x0059, 0x0323}, 0x1EF4}, // 'Y' + combining -> 'Ỵ' + {{0x005A, 0x0301}, 0x0179}, // 'Z' + combining -> 'Ź' + {{0x005A, 0x0302}, 0x1E90}, // 'Z' + combining -> 'Ẑ' + {{0x005A, 0x0307}, 0x017B}, // 'Z' + combining -> 'Ż' + {{0x005A, 0x030C}, 0x017D}, // 'Z' + combining -> 'Ž' + {{0x005A, 0x0323}, 0x1E92}, // 'Z' + combining -> 'Ẓ' + {{0x005A, 0x0331}, 0x1E94}, // 'Z' + combining -> 'Ẕ' + {{0x0061, 0x0300}, 0x00E0}, // 'a' + combining -> 'à' + {{0x0061, 0x0301}, 0x00E1}, // 'a' + combining -> 'á' + {{0x0061, 0x0302}, 0x00E2}, // 'a' + combining -> 'â' + {{0x0061, 0x0303}, 0x00E3}, // 'a' + combining -> 'ã' + {{0x0061, 0x0304}, 0x0101}, // 'a' + combining -> 'ā' + {{0x0061, 0x0306}, 0x0103}, // 'a' + combining -> 'ă' + {{0x0061, 0x0307}, 0x0227}, // 'a' + combining -> 'ȧ' + {{0x0061, 0x0308}, 0x00E4}, // 'a' + combining -> 'ä' + {{0x0061, 0x0309}, 0x1EA3}, // 'a' + combining -> 'ả' + {{0x0061, 0x030A}, 0x00E5}, // 'a' + combining -> 'å' + {{0x0061, 0x030C}, 0x01CE}, // 'a' + combining -> 'ǎ' + {{0x0061, 0x030F}, 0x0201}, // 'a' + combining -> 'ȁ' + {{0x0061, 0x0311}, 0x0203}, // 'a' + combining -> 'ȃ' + {{0x0061, 0x0323}, 0x1EA1}, // 'a' + combining -> 'ạ' + {{0x0061, 0x0325}, 0x1E01}, // 'a' + combining -> 'ḁ' + {{0x0061, 0x0328}, 0x0105}, // 'a' + combining -> 'ą' + {{0x0062, 0x0307}, 0x1E03}, // 'b' + combining -> 'ḃ' + {{0x0062, 0x0323}, 0x1E05}, // 'b' + combining -> 'ḅ' + {{0x0062, 0x0331}, 0x1E07}, // 'b' + combining -> 'ḇ' + {{0x0063, 0x0301}, 0x0107}, // 'c' + combining -> 'ć' + {{0x0063, 0x0302}, 0x0109}, // 'c' + combining -> 'ĉ' + {{0x0063, 0x0307}, 0x010B}, // 'c' + combining -> 'ċ' + {{0x0063, 0x030C}, 0x010D}, // 'c' + combining -> 'č' + {{0x0063, 0x0327}, 0x00E7}, // 'c' + combining -> 'ç' + {{0x0064, 0x0307}, 0x1E0B}, // 'd' + combining -> 'ḋ' + {{0x0064, 0x030C}, 0x010F}, // 'd' + combining -> 'ď' + {{0x0064, 0x0323}, 0x1E0D}, // 'd' + combining -> 'ḍ' + {{0x0064, 0x0327}, 0x1E11}, // 'd' + combining -> 'ḑ' + {{0x0064, 0x032D}, 0x1E13}, // 'd' + combining -> 'ḓ' + {{0x0064, 0x0331}, 0x1E0F}, // 'd' + combining -> 'ḏ' + {{0x0065, 0x0300}, 0x00E8}, // 'e' + combining -> 'è' + {{0x0065, 0x0301}, 0x00E9}, // 'e' + combining -> 'é' + {{0x0065, 0x0302}, 0x00EA}, // 'e' + combining -> 'ê' + {{0x0065, 0x0303}, 0x1EBD}, // 'e' + combining -> 'ẽ' + {{0x0065, 0x0304}, 0x0113}, // 'e' + combining -> 'ē' + {{0x0065, 0x0306}, 0x0115}, // 'e' + combining -> 'ĕ' + {{0x0065, 0x0307}, 0x0117}, // 'e' + combining -> 'ė' + {{0x0065, 0x0308}, 0x00EB}, // 'e' + combining -> 'ë' + {{0x0065, 0x0309}, 0x1EBB}, // 'e' + combining -> 'ẻ' + {{0x0065, 0x030C}, 0x011B}, // 'e' + combining -> 'ě' + {{0x0065, 0x030F}, 0x0205}, // 'e' + combining -> 'ȅ' + {{0x0065, 0x0311}, 0x0207}, // 'e' + combining -> 'ȇ' + {{0x0065, 0x0323}, 0x1EB9}, // 'e' + combining -> 'ẹ' + {{0x0065, 0x0327}, 0x0229}, // 'e' + combining -> 'ȩ' + {{0x0065, 0x0328}, 0x0119}, // 'e' + combining -> 'ę' + {{0x0065, 0x032D}, 0x1E19}, // 'e' + combining -> 'ḙ' + {{0x0065, 0x0330}, 0x1E1B}, // 'e' + combining -> 'ḛ' + {{0x0066, 0x0307}, 0x1E1F}, // 'f' + combining -> 'ḟ' + {{0x0067, 0x0301}, 0x01F5}, // 'g' + combining -> 'ǵ' + {{0x0067, 0x0302}, 0x011D}, // 'g' + combining -> 'ĝ' + {{0x0067, 0x0304}, 0x1E21}, // 'g' + combining -> 'ḡ' + {{0x0067, 0x0306}, 0x011F}, // 'g' + combining -> 'ğ' + {{0x0067, 0x0307}, 0x0121}, // 'g' + combining -> 'ġ' + {{0x0067, 0x030C}, 0x01E7}, // 'g' + combining -> 'ǧ' + {{0x0067, 0x0327}, 0x0123}, // 'g' + combining -> 'ģ' + {{0x0068, 0x0302}, 0x0125}, // 'h' + combining -> 'ĥ' + {{0x0068, 0x0307}, 0x1E23}, // 'h' + combining -> 'ḣ' + {{0x0068, 0x0308}, 0x1E27}, // 'h' + combining -> 'ḧ' + {{0x0068, 0x030C}, 0x021F}, // 'h' + combining -> 'ȟ' + {{0x0068, 0x0323}, 0x1E25}, // 'h' + combining -> 'ḥ' + {{0x0068, 0x0327}, 0x1E29}, // 'h' + combining -> 'ḩ' + {{0x0068, 0x032E}, 0x1E2B}, // 'h' + combining -> 'ḫ' + {{0x0068, 0x0331}, 0x1E96}, // 'h' + combining -> 'ẖ' + {{0x0069, 0x0300}, 0x00EC}, // 'i' + combining -> 'ì' + {{0x0069, 0x0301}, 0x00ED}, // 'i' + combining -> 'í' + {{0x0069, 0x0302}, 0x00EE}, // 'i' + combining -> 'î' + {{0x0069, 0x0303}, 0x0129}, // 'i' + combining -> 'ĩ' + {{0x0069, 0x0304}, 0x012B}, // 'i' + combining -> 'ī' + {{0x0069, 0x0306}, 0x012D}, // 'i' + combining -> 'ĭ' + {{0x0069, 0x0308}, 0x00EF}, // 'i' + combining -> 'ï' + {{0x0069, 0x0309}, 0x1EC9}, // 'i' + combining -> 'ỉ' + {{0x0069, 0x030C}, 0x01D0}, // 'i' + combining -> 'ǐ' + {{0x0069, 0x030F}, 0x0209}, // 'i' + combining -> 'ȉ' + {{0x0069, 0x0311}, 0x020B}, // 'i' + combining -> 'ȋ' + {{0x0069, 0x0323}, 0x1ECB}, // 'i' + combining -> 'ị' + {{0x0069, 0x0328}, 0x012F}, // 'i' + combining -> 'į' + {{0x0069, 0x0330}, 0x1E2D}, // 'i' + combining -> 'ḭ' + {{0x006A, 0x0302}, 0x0135}, // 'j' + combining -> 'ĵ' + {{0x006A, 0x030C}, 0x01F0}, // 'j' + combining -> 'ǰ' + {{0x006B, 0x0301}, 0x1E31}, // 'k' + combining -> 'ḱ' + {{0x006B, 0x030C}, 0x01E9}, // 'k' + combining -> 'ǩ' + {{0x006B, 0x0323}, 0x1E33}, // 'k' + combining -> 'ḳ' + {{0x006B, 0x0327}, 0x0137}, // 'k' + combining -> 'ķ' + {{0x006B, 0x0331}, 0x1E35}, // 'k' + combining -> 'ḵ' + {{0x006C, 0x0301}, 0x013A}, // 'l' + combining -> 'ĺ' + {{0x006C, 0x030C}, 0x013E}, // 'l' + combining -> 'ľ' + {{0x006C, 0x0323}, 0x1E37}, // 'l' + combining -> 'ḷ' + {{0x006C, 0x0327}, 0x013C}, // 'l' + combining -> 'ļ' + {{0x006C, 0x032D}, 0x1E3D}, // 'l' + combining -> 'ḽ' + {{0x006C, 0x0331}, 0x1E3B}, // 'l' + combining -> 'ḻ' + {{0x006D, 0x0301}, 0x1E3F}, // 'm' + combining -> 'ḿ' + {{0x006D, 0x0307}, 0x1E41}, // 'm' + combining -> 'ṁ' + {{0x006D, 0x0323}, 0x1E43}, // 'm' + combining -> 'ṃ' + {{0x006E, 0x0300}, 0x01F9}, // 'n' + combining -> 'ǹ' + {{0x006E, 0x0301}, 0x0144}, // 'n' + combining -> 'ń' + {{0x006E, 0x0303}, 0x00F1}, // 'n' + combining -> 'ñ' + {{0x006E, 0x0307}, 0x1E45}, // 'n' + combining -> 'ṅ' + {{0x006E, 0x030C}, 0x0148}, // 'n' + combining -> 'ň' + {{0x006E, 0x0323}, 0x1E47}, // 'n' + combining -> 'ṇ' + {{0x006E, 0x0327}, 0x0146}, // 'n' + combining -> 'ņ' + {{0x006E, 0x032D}, 0x1E4B}, // 'n' + combining -> 'ṋ' + {{0x006E, 0x0331}, 0x1E49}, // 'n' + combining -> 'ṉ' + {{0x006F, 0x0300}, 0x00F2}, // 'o' + combining -> 'ò' + {{0x006F, 0x0301}, 0x00F3}, // 'o' + combining -> 'ó' + {{0x006F, 0x0302}, 0x00F4}, // 'o' + combining -> 'ô' + {{0x006F, 0x0303}, 0x00F5}, // 'o' + combining -> 'õ' + {{0x006F, 0x0304}, 0x014D}, // 'o' + combining -> 'ō' + {{0x006F, 0x0306}, 0x014F}, // 'o' + combining -> 'ŏ' + {{0x006F, 0x0307}, 0x022F}, // 'o' + combining -> 'ȯ' + {{0x006F, 0x0308}, 0x00F6}, // 'o' + combining -> 'ö' + {{0x006F, 0x0309}, 0x1ECF}, // 'o' + combining -> 'ỏ' + {{0x006F, 0x030B}, 0x0151}, // 'o' + combining -> 'ő' + {{0x006F, 0x030C}, 0x01D2}, // 'o' + combining -> 'ǒ' + {{0x006F, 0x030F}, 0x020D}, // 'o' + combining -> 'ȍ' + {{0x006F, 0x0311}, 0x020F}, // 'o' + combining -> 'ȏ' + {{0x006F, 0x031B}, 0x01A1}, // 'o' + combining -> 'ơ' + {{0x006F, 0x0323}, 0x1ECD}, // 'o' + combining -> 'ọ' + {{0x006F, 0x0328}, 0x01EB}, // 'o' + combining -> 'ǫ' + {{0x0070, 0x0301}, 0x1E55}, // 'p' + combining -> 'ṕ' + {{0x0070, 0x0307}, 0x1E57}, // 'p' + combining -> 'ṗ' + {{0x0072, 0x0301}, 0x0155}, // 'r' + combining -> 'ŕ' + {{0x0072, 0x0307}, 0x1E59}, // 'r' + combining -> 'ṙ' + {{0x0072, 0x030C}, 0x0159}, // 'r' + combining -> 'ř' + {{0x0072, 0x030F}, 0x0211}, // 'r' + combining -> 'ȑ' + {{0x0072, 0x0311}, 0x0213}, // 'r' + combining -> 'ȓ' + {{0x0072, 0x0323}, 0x1E5B}, // 'r' + combining -> 'ṛ' + {{0x0072, 0x0327}, 0x0157}, // 'r' + combining -> 'ŗ' + {{0x0072, 0x0331}, 0x1E5F}, // 'r' + combining -> 'ṟ' + {{0x0073, 0x0301}, 0x015B}, // 's' + combining -> 'ś' + {{0x0073, 0x0302}, 0x015D}, // 's' + combining -> 'ŝ' + {{0x0073, 0x0307}, 0x1E61}, // 's' + combining -> 'ṡ' + {{0x0073, 0x030C}, 0x0161}, // 's' + combining -> 'š' + {{0x0073, 0x0323}, 0x1E63}, // 's' + combining -> 'ṣ' + {{0x0073, 0x0326}, 0x0219}, // 's' + combining -> 'ș' + {{0x0073, 0x0327}, 0x015F}, // 's' + combining -> 'ş' + {{0x0074, 0x0307}, 0x1E6B}, // 't' + combining -> 'ṫ' + {{0x0074, 0x0308}, 0x1E97}, // 't' + combining -> 'ẗ' + {{0x0074, 0x030C}, 0x0165}, // 't' + combining -> 'ť' + {{0x0074, 0x0323}, 0x1E6D}, // 't' + combining -> 'ṭ' + {{0x0074, 0x0326}, 0x021B}, // 't' + combining -> 'ț' + {{0x0074, 0x0327}, 0x0163}, // 't' + combining -> 'ţ' + {{0x0074, 0x032D}, 0x1E71}, // 't' + combining -> 'ṱ' + {{0x0074, 0x0331}, 0x1E6F}, // 't' + combining -> 'ṯ' + {{0x0075, 0x0300}, 0x00F9}, // 'u' + combining -> 'ù' + {{0x0075, 0x0301}, 0x00FA}, // 'u' + combining -> 'ú' + {{0x0075, 0x0302}, 0x00FB}, // 'u' + combining -> 'û' + {{0x0075, 0x0303}, 0x0169}, // 'u' + combining -> 'ũ' + {{0x0075, 0x0304}, 0x016B}, // 'u' + combining -> 'ū' + {{0x0075, 0x0306}, 0x016D}, // 'u' + combining -> 'ŭ' + {{0x0075, 0x0308}, 0x00FC}, // 'u' + combining -> 'ü' + {{0x0075, 0x0309}, 0x1EE7}, // 'u' + combining -> 'ủ' + {{0x0075, 0x030A}, 0x016F}, // 'u' + combining -> 'ů' + {{0x0075, 0x030B}, 0x0171}, // 'u' + combining -> 'ű' + {{0x0075, 0x030C}, 0x01D4}, // 'u' + combining -> 'ǔ' + {{0x0075, 0x030F}, 0x0215}, // 'u' + combining -> 'ȕ' + {{0x0075, 0x0311}, 0x0217}, // 'u' + combining -> 'ȗ' + {{0x0075, 0x031B}, 0x01B0}, // 'u' + combining -> 'ư' + {{0x0075, 0x0323}, 0x1EE5}, // 'u' + combining -> 'ụ' + {{0x0075, 0x0324}, 0x1E73}, // 'u' + combining -> 'ṳ' + {{0x0075, 0x0328}, 0x0173}, // 'u' + combining -> 'ų' + {{0x0075, 0x032D}, 0x1E77}, // 'u' + combining -> 'ṷ' + {{0x0075, 0x0330}, 0x1E75}, // 'u' + combining -> 'ṵ' + {{0x0076, 0x0303}, 0x1E7D}, // 'v' + combining -> 'ṽ' + {{0x0076, 0x0323}, 0x1E7F}, // 'v' + combining -> 'ṿ' + {{0x0077, 0x0300}, 0x1E81}, // 'w' + combining -> 'ẁ' + {{0x0077, 0x0301}, 0x1E83}, // 'w' + combining -> 'ẃ' + {{0x0077, 0x0302}, 0x0175}, // 'w' + combining -> 'ŵ' + {{0x0077, 0x0307}, 0x1E87}, // 'w' + combining -> 'ẇ' + {{0x0077, 0x0308}, 0x1E85}, // 'w' + combining -> 'ẅ' + {{0x0077, 0x030A}, 0x1E98}, // 'w' + combining -> 'ẘ' + {{0x0077, 0x0323}, 0x1E89}, // 'w' + combining -> 'ẉ' + {{0x0078, 0x0307}, 0x1E8B}, // 'x' + combining -> 'ẋ' + {{0x0078, 0x0308}, 0x1E8D}, // 'x' + combining -> 'ẍ' + {{0x0079, 0x0300}, 0x1EF3}, // 'y' + combining -> 'ỳ' + {{0x0079, 0x0301}, 0x00FD}, // 'y' + combining -> 'ý' + {{0x0079, 0x0302}, 0x0177}, // 'y' + combining -> 'ŷ' + {{0x0079, 0x0303}, 0x1EF9}, // 'y' + combining -> 'ỹ' + {{0x0079, 0x0304}, 0x0233}, // 'y' + combining -> 'ȳ' + {{0x0079, 0x0307}, 0x1E8F}, // 'y' + combining -> 'ẏ' + {{0x0079, 0x0308}, 0x00FF}, // 'y' + combining -> 'ÿ' + {{0x0079, 0x0309}, 0x1EF7}, // 'y' + combining -> 'ỷ' + {{0x0079, 0x030A}, 0x1E99}, // 'y' + combining -> 'ẙ' + {{0x0079, 0x0323}, 0x1EF5}, // 'y' + combining -> 'ỵ' + {{0x007A, 0x0301}, 0x017A}, // 'z' + combining -> 'ź' + {{0x007A, 0x0302}, 0x1E91}, // 'z' + combining -> 'ẑ' + {{0x007A, 0x0307}, 0x017C}, // 'z' + combining -> 'ż' + {{0x007A, 0x030C}, 0x017E}, // 'z' + combining -> 'ž' + {{0x007A, 0x0323}, 0x1E93}, // 'z' + combining -> 'ẓ' + {{0x007A, 0x0331}, 0x1E95}, // 'z' + combining -> 'ẕ' + {{0x00A8, 0x0300}, 0x1FED}, // '¨' + combining -> '῭' + {{0x00A8, 0x0301}, 0x0385}, // '¨' + combining -> '΅' + {{0x00A8, 0x0342}, 0x1FC1}, // '¨' + combining -> '῁' + {{0x00C2, 0x0300}, 0x1EA6}, // 'Â' + combining -> 'Ầ' + {{0x00C2, 0x0301}, 0x1EA4}, // 'Â' + combining -> 'Ấ' + {{0x00C2, 0x0303}, 0x1EAA}, // 'Â' + combining -> 'Ẫ' + {{0x00C2, 0x0309}, 0x1EA8}, // 'Â' + combining -> 'Ẩ' + {{0x00C4, 0x0304}, 0x01DE}, // 'Ä' + combining -> 'Ǟ' + {{0x00C5, 0x0301}, 0x01FA}, // 'Å' + combining -> 'Ǻ' + {{0x00C6, 0x0301}, 0x01FC}, // 'Æ' + combining -> 'Ǽ' + {{0x00C6, 0x0304}, 0x01E2}, // 'Æ' + combining -> 'Ǣ' + {{0x00C7, 0x0301}, 0x1E08}, // 'Ç' + combining -> 'Ḉ' + {{0x00CA, 0x0300}, 0x1EC0}, // 'Ê' + combining -> 'Ề' + {{0x00CA, 0x0301}, 0x1EBE}, // 'Ê' + combining -> 'Ế' + {{0x00CA, 0x0303}, 0x1EC4}, // 'Ê' + combining -> 'Ễ' + {{0x00CA, 0x0309}, 0x1EC2}, // 'Ê' + combining -> 'Ể' + {{0x00CF, 0x0301}, 0x1E2E}, // 'Ï' + combining -> 'Ḯ' + {{0x00D4, 0x0300}, 0x1ED2}, // 'Ô' + combining -> 'Ồ' + {{0x00D4, 0x0301}, 0x1ED0}, // 'Ô' + combining -> 'Ố' + {{0x00D4, 0x0303}, 0x1ED6}, // 'Ô' + combining -> 'Ỗ' + {{0x00D4, 0x0309}, 0x1ED4}, // 'Ô' + combining -> 'Ổ' + {{0x00D5, 0x0301}, 0x1E4C}, // 'Õ' + combining -> 'Ṍ' + {{0x00D5, 0x0304}, 0x022C}, // 'Õ' + combining -> 'Ȭ' + {{0x00D5, 0x0308}, 0x1E4E}, // 'Õ' + combining -> 'Ṏ' + {{0x00D6, 0x0304}, 0x022A}, // 'Ö' + combining -> 'Ȫ' + {{0x00D8, 0x0301}, 0x01FE}, // 'Ø' + combining -> 'Ǿ' + {{0x00DC, 0x0300}, 0x01DB}, // 'Ü' + combining -> 'Ǜ' + {{0x00DC, 0x0301}, 0x01D7}, // 'Ü' + combining -> 'Ǘ' + {{0x00DC, 0x0304}, 0x01D5}, // 'Ü' + combining -> 'Ǖ' + {{0x00DC, 0x030C}, 0x01D9}, // 'Ü' + combining -> 'Ǚ' + {{0x00E2, 0x0300}, 0x1EA7}, // 'â' + combining -> 'ầ' + {{0x00E2, 0x0301}, 0x1EA5}, // 'â' + combining -> 'ấ' + {{0x00E2, 0x0303}, 0x1EAB}, // 'â' + combining -> 'ẫ' + {{0x00E2, 0x0309}, 0x1EA9}, // 'â' + combining -> 'ẩ' + {{0x00E4, 0x0304}, 0x01DF}, // 'ä' + combining -> 'ǟ' + {{0x00E5, 0x0301}, 0x01FB}, // 'å' + combining -> 'ǻ' + {{0x00E6, 0x0301}, 0x01FD}, // 'æ' + combining -> 'ǽ' + {{0x00E6, 0x0304}, 0x01E3}, // 'æ' + combining -> 'ǣ' + {{0x00E7, 0x0301}, 0x1E09}, // 'ç' + combining -> 'ḉ' + {{0x00EA, 0x0300}, 0x1EC1}, // 'ê' + combining -> 'ề' + {{0x00EA, 0x0301}, 0x1EBF}, // 'ê' + combining -> 'ế' + {{0x00EA, 0x0303}, 0x1EC5}, // 'ê' + combining -> 'ễ' + {{0x00EA, 0x0309}, 0x1EC3}, // 'ê' + combining -> 'ể' + {{0x00EF, 0x0301}, 0x1E2F}, // 'ï' + combining -> 'ḯ' + {{0x00F4, 0x0300}, 0x1ED3}, // 'ô' + combining -> 'ồ' + {{0x00F4, 0x0301}, 0x1ED1}, // 'ô' + combining -> 'ố' + {{0x00F4, 0x0303}, 0x1ED7}, // 'ô' + combining -> 'ỗ' + {{0x00F4, 0x0309}, 0x1ED5}, // 'ô' + combining -> 'ổ' + {{0x00F5, 0x0301}, 0x1E4D}, // 'õ' + combining -> 'ṍ' + {{0x00F5, 0x0304}, 0x022D}, // 'õ' + combining -> 'ȭ' + {{0x00F5, 0x0308}, 0x1E4F}, // 'õ' + combining -> 'ṏ' + {{0x00F6, 0x0304}, 0x022B}, // 'ö' + combining -> 'ȫ' + {{0x00F8, 0x0301}, 0x01FF}, // 'ø' + combining -> 'ǿ' + {{0x00FC, 0x0300}, 0x01DC}, // 'ü' + combining -> 'ǜ' + {{0x00FC, 0x0301}, 0x01D8}, // 'ü' + combining -> 'ǘ' + {{0x00FC, 0x0304}, 0x01D6}, // 'ü' + combining -> 'ǖ' + {{0x00FC, 0x030C}, 0x01DA}, // 'ü' + combining -> 'ǚ' + {{0x0102, 0x0300}, 0x1EB0}, // 'Ă' + combining -> 'Ằ' + {{0x0102, 0x0301}, 0x1EAE}, // 'Ă' + combining -> 'Ắ' + {{0x0102, 0x0303}, 0x1EB4}, // 'Ă' + combining -> 'Ẵ' + {{0x0102, 0x0309}, 0x1EB2}, // 'Ă' + combining -> 'Ẳ' + {{0x0103, 0x0300}, 0x1EB1}, // 'ă' + combining -> 'ằ' + {{0x0103, 0x0301}, 0x1EAF}, // 'ă' + combining -> 'ắ' + {{0x0103, 0x0303}, 0x1EB5}, // 'ă' + combining -> 'ẵ' + {{0x0103, 0x0309}, 0x1EB3}, // 'ă' + combining -> 'ẳ' + {{0x0112, 0x0300}, 0x1E14}, // 'Ē' + combining -> 'Ḕ' + {{0x0112, 0x0301}, 0x1E16}, // 'Ē' + combining -> 'Ḗ' + {{0x0113, 0x0300}, 0x1E15}, // 'ē' + combining -> 'ḕ' + {{0x0113, 0x0301}, 0x1E17}, // 'ē' + combining -> 'ḗ' + {{0x014C, 0x0300}, 0x1E50}, // 'Ō' + combining -> 'Ṑ' + {{0x014C, 0x0301}, 0x1E52}, // 'Ō' + combining -> 'Ṓ' + {{0x014D, 0x0300}, 0x1E51}, // 'ō' + combining -> 'ṑ' + {{0x014D, 0x0301}, 0x1E53}, // 'ō' + combining -> 'ṓ' + {{0x015A, 0x0307}, 0x1E64}, // 'Ś' + combining -> 'Ṥ' + {{0x015B, 0x0307}, 0x1E65}, // 'ś' + combining -> 'ṥ' + {{0x0160, 0x0307}, 0x1E66}, // 'Š' + combining -> 'Ṧ' + {{0x0161, 0x0307}, 0x1E67}, // 'š' + combining -> 'ṧ' + {{0x0168, 0x0301}, 0x1E78}, // 'Ũ' + combining -> 'Ṹ' + {{0x0169, 0x0301}, 0x1E79}, // 'ũ' + combining -> 'ṹ' + {{0x016A, 0x0308}, 0x1E7A}, // 'Ū' + combining -> 'Ṻ' + {{0x016B, 0x0308}, 0x1E7B}, // 'ū' + combining -> 'ṻ' + {{0x017F, 0x0307}, 0x1E9B}, // 'ſ' + combining -> 'ẛ' + {{0x01A0, 0x0300}, 0x1EDC}, // 'Ơ' + combining -> 'Ờ' + {{0x01A0, 0x0301}, 0x1EDA}, // 'Ơ' + combining -> 'Ớ' + {{0x01A0, 0x0303}, 0x1EE0}, // 'Ơ' + combining -> 'Ỡ' + {{0x01A0, 0x0309}, 0x1EDE}, // 'Ơ' + combining -> 'Ở' + {{0x01A0, 0x0323}, 0x1EE2}, // 'Ơ' + combining -> 'Ợ' + {{0x01A1, 0x0300}, 0x1EDD}, // 'ơ' + combining -> 'ờ' + {{0x01A1, 0x0301}, 0x1EDB}, // 'ơ' + combining -> 'ớ' + {{0x01A1, 0x0303}, 0x1EE1}, // 'ơ' + combining -> 'ỡ' + {{0x01A1, 0x0309}, 0x1EDF}, // 'ơ' + combining -> 'ở' + {{0x01A1, 0x0323}, 0x1EE3}, // 'ơ' + combining -> 'ợ' + {{0x01AF, 0x0300}, 0x1EEA}, // 'Ư' + combining -> 'Ừ' + {{0x01AF, 0x0301}, 0x1EE8}, // 'Ư' + combining -> 'Ứ' + {{0x01AF, 0x0303}, 0x1EEE}, // 'Ư' + combining -> 'Ữ' + {{0x01AF, 0x0309}, 0x1EEC}, // 'Ư' + combining -> 'Ử' + {{0x01AF, 0x0323}, 0x1EF0}, // 'Ư' + combining -> 'Ự' + {{0x01B0, 0x0300}, 0x1EEB}, // 'ư' + combining -> 'ừ' + {{0x01B0, 0x0301}, 0x1EE9}, // 'ư' + combining -> 'ứ' + {{0x01B0, 0x0303}, 0x1EEF}, // 'ư' + combining -> 'ữ' + {{0x01B0, 0x0309}, 0x1EED}, // 'ư' + combining -> 'ử' + {{0x01B0, 0x0323}, 0x1EF1}, // 'ư' + combining -> 'ự' + {{0x01B7, 0x030C}, 0x01EE}, // 'Ʒ' + combining -> 'Ǯ' + {{0x01EA, 0x0304}, 0x01EC}, // 'Ǫ' + combining -> 'Ǭ' + {{0x01EB, 0x0304}, 0x01ED}, // 'ǫ' + combining -> 'ǭ' + {{0x0226, 0x0304}, 0x01E0}, // 'Ȧ' + combining -> 'Ǡ' + {{0x0227, 0x0304}, 0x01E1}, // 'ȧ' + combining -> 'ǡ' + {{0x0228, 0x0306}, 0x1E1C}, // 'Ȩ' + combining -> 'Ḝ' + {{0x0229, 0x0306}, 0x1E1D}, // 'ȩ' + combining -> 'ḝ' + {{0x022E, 0x0304}, 0x0230}, // 'Ȯ' + combining -> 'Ȱ' + {{0x022F, 0x0304}, 0x0231}, // 'ȯ' + combining -> 'ȱ' + {{0x0292, 0x030C}, 0x01EF}, // 'ʒ' + combining -> 'ǯ' + {{0x0308, 0x0301}, 0x0344}, // '̈' + combining -> '̈́' + {{0x0391, 0x0300}, 0x1FBA}, // 'Α' + combining -> 'Ὰ' + {{0x0391, 0x0301}, 0x0386}, // 'Α' + combining -> 'Ά' + {{0x0391, 0x0304}, 0x1FB9}, // 'Α' + combining -> 'Ᾱ' + {{0x0391, 0x0306}, 0x1FB8}, // 'Α' + combining -> 'Ᾰ' + {{0x0391, 0x0313}, 0x1F08}, // 'Α' + combining -> 'Ἀ' + {{0x0391, 0x0314}, 0x1F09}, // 'Α' + combining -> 'Ἁ' + {{0x0391, 0x0345}, 0x1FBC}, // 'Α' + combining -> 'ᾼ' + {{0x0395, 0x0300}, 0x1FC8}, // 'Ε' + combining -> 'Ὲ' + {{0x0395, 0x0301}, 0x0388}, // 'Ε' + combining -> 'Έ' + {{0x0395, 0x0313}, 0x1F18}, // 'Ε' + combining -> 'Ἐ' + {{0x0395, 0x0314}, 0x1F19}, // 'Ε' + combining -> 'Ἑ' + {{0x0397, 0x0300}, 0x1FCA}, // 'Η' + combining -> 'Ὴ' + {{0x0397, 0x0301}, 0x0389}, // 'Η' + combining -> 'Ή' + {{0x0397, 0x0313}, 0x1F28}, // 'Η' + combining -> 'Ἠ' + {{0x0397, 0x0314}, 0x1F29}, // 'Η' + combining -> 'Ἡ' + {{0x0397, 0x0345}, 0x1FCC}, // 'Η' + combining -> 'ῌ' + {{0x0399, 0x0300}, 0x1FDA}, // 'Ι' + combining -> 'Ὶ' + {{0x0399, 0x0301}, 0x038A}, // 'Ι' + combining -> 'Ί' + {{0x0399, 0x0304}, 0x1FD9}, // 'Ι' + combining -> 'Ῑ' + {{0x0399, 0x0306}, 0x1FD8}, // 'Ι' + combining -> 'Ῐ' + {{0x0399, 0x0308}, 0x03AA}, // 'Ι' + combining -> 'Ϊ' + {{0x0399, 0x0313}, 0x1F38}, // 'Ι' + combining -> 'Ἰ' + {{0x0399, 0x0314}, 0x1F39}, // 'Ι' + combining -> 'Ἱ' + {{0x039F, 0x0300}, 0x1FF8}, // 'Ο' + combining -> 'Ὸ' + {{0x039F, 0x0301}, 0x038C}, // 'Ο' + combining -> 'Ό' + {{0x039F, 0x0313}, 0x1F48}, // 'Ο' + combining -> 'Ὀ' + {{0x039F, 0x0314}, 0x1F49}, // 'Ο' + combining -> 'Ὁ' + {{0x03A1, 0x0314}, 0x1FEC}, // 'Ρ' + combining -> 'Ῥ' + {{0x03A5, 0x0300}, 0x1FEA}, // 'Υ' + combining -> 'Ὺ' + {{0x03A5, 0x0301}, 0x038E}, // 'Υ' + combining -> 'Ύ' + {{0x03A5, 0x0304}, 0x1FE9}, // 'Υ' + combining -> 'Ῡ' + {{0x03A5, 0x0306}, 0x1FE8}, // 'Υ' + combining -> 'Ῠ' + {{0x03A5, 0x0308}, 0x03AB}, // 'Υ' + combining -> 'Ϋ' + {{0x03A5, 0x0314}, 0x1F59}, // 'Υ' + combining -> 'Ὑ' + {{0x03A9, 0x0300}, 0x1FFA}, // 'Ω' + combining -> 'Ὼ' + {{0x03A9, 0x0301}, 0x038F}, // 'Ω' + combining -> 'Ώ' + {{0x03A9, 0x0313}, 0x1F68}, // 'Ω' + combining -> 'Ὠ' + {{0x03A9, 0x0314}, 0x1F69}, // 'Ω' + combining -> 'Ὡ' + {{0x03A9, 0x0345}, 0x1FFC}, // 'Ω' + combining -> 'ῼ' + {{0x03AC, 0x0345}, 0x1FB4}, // 'ά' + combining -> 'ᾴ' + {{0x03AE, 0x0345}, 0x1FC4}, // 'ή' + combining -> 'ῄ' + {{0x03B1, 0x0300}, 0x1F70}, // 'α' + combining -> 'ὰ' + {{0x03B1, 0x0301}, 0x03AC}, // 'α' + combining -> 'ά' + {{0x03B1, 0x0304}, 0x1FB1}, // 'α' + combining -> 'ᾱ' + {{0x03B1, 0x0306}, 0x1FB0}, // 'α' + combining -> 'ᾰ' + {{0x03B1, 0x0313}, 0x1F00}, // 'α' + combining -> 'ἀ' + {{0x03B1, 0x0314}, 0x1F01}, // 'α' + combining -> 'ἁ' + {{0x03B1, 0x0342}, 0x1FB6}, // 'α' + combining -> 'ᾶ' + {{0x03B1, 0x0345}, 0x1FB3}, // 'α' + combining -> 'ᾳ' + {{0x03B5, 0x0300}, 0x1F72}, // 'ε' + combining -> 'ὲ' + {{0x03B5, 0x0301}, 0x03AD}, // 'ε' + combining -> 'έ' + {{0x03B5, 0x0313}, 0x1F10}, // 'ε' + combining -> 'ἐ' + {{0x03B5, 0x0314}, 0x1F11}, // 'ε' + combining -> 'ἑ' + {{0x03B7, 0x0300}, 0x1F74}, // 'η' + combining -> 'ὴ' + {{0x03B7, 0x0301}, 0x03AE}, // 'η' + combining -> 'ή' + {{0x03B7, 0x0313}, 0x1F20}, // 'η' + combining -> 'ἠ' + {{0x03B7, 0x0314}, 0x1F21}, // 'η' + combining -> 'ἡ' + {{0x03B7, 0x0342}, 0x1FC6}, // 'η' + combining -> 'ῆ' + {{0x03B7, 0x0345}, 0x1FC3}, // 'η' + combining -> 'ῃ' + {{0x03B9, 0x0300}, 0x1F76}, // 'ι' + combining -> 'ὶ' + {{0x03B9, 0x0301}, 0x03AF}, // 'ι' + combining -> 'ί' + {{0x03B9, 0x0304}, 0x1FD1}, // 'ι' + combining -> 'ῑ' + {{0x03B9, 0x0306}, 0x1FD0}, // 'ι' + combining -> 'ῐ' + {{0x03B9, 0x0308}, 0x03CA}, // 'ι' + combining -> 'ϊ' + {{0x03B9, 0x0313}, 0x1F30}, // 'ι' + combining -> 'ἰ' + {{0x03B9, 0x0314}, 0x1F31}, // 'ι' + combining -> 'ἱ' + {{0x03B9, 0x0342}, 0x1FD6}, // 'ι' + combining -> 'ῖ' + {{0x03BF, 0x0300}, 0x1F78}, // 'ο' + combining -> 'ὸ' + {{0x03BF, 0x0301}, 0x03CC}, // 'ο' + combining -> 'ό' + {{0x03BF, 0x0313}, 0x1F40}, // 'ο' + combining -> 'ὀ' + {{0x03BF, 0x0314}, 0x1F41}, // 'ο' + combining -> 'ὁ' + {{0x03C1, 0x0313}, 0x1FE4}, // 'ρ' + combining -> 'ῤ' + {{0x03C1, 0x0314}, 0x1FE5}, // 'ρ' + combining -> 'ῥ' + {{0x03C5, 0x0300}, 0x1F7A}, // 'υ' + combining -> 'ὺ' + {{0x03C5, 0x0301}, 0x03CD}, // 'υ' + combining -> 'ύ' + {{0x03C5, 0x0304}, 0x1FE1}, // 'υ' + combining -> 'ῡ' + {{0x03C5, 0x0306}, 0x1FE0}, // 'υ' + combining -> 'ῠ' + {{0x03C5, 0x0308}, 0x03CB}, // 'υ' + combining -> 'ϋ' + {{0x03C5, 0x0313}, 0x1F50}, // 'υ' + combining -> 'ὐ' + {{0x03C5, 0x0314}, 0x1F51}, // 'υ' + combining -> 'ὑ' + {{0x03C5, 0x0342}, 0x1FE6}, // 'υ' + combining -> 'ῦ' + {{0x03C9, 0x0300}, 0x1F7C}, // 'ω' + combining -> 'ὼ' + {{0x03C9, 0x0301}, 0x03CE}, // 'ω' + combining -> 'ώ' + {{0x03C9, 0x0313}, 0x1F60}, // 'ω' + combining -> 'ὠ' + {{0x03C9, 0x0314}, 0x1F61}, // 'ω' + combining -> 'ὡ' + {{0x03C9, 0x0342}, 0x1FF6}, // 'ω' + combining -> 'ῶ' + {{0x03C9, 0x0345}, 0x1FF3}, // 'ω' + combining -> 'ῳ' + {{0x03CA, 0x0300}, 0x1FD2}, // 'ϊ' + combining -> 'ῒ' + {{0x03CA, 0x0301}, 0x0390}, // 'ϊ' + combining -> 'ΐ' + {{0x03CA, 0x0342}, 0x1FD7}, // 'ϊ' + combining -> 'ῗ' + {{0x03CB, 0x0300}, 0x1FE2}, // 'ϋ' + combining -> 'ῢ' + {{0x03CB, 0x0301}, 0x03B0}, // 'ϋ' + combining -> 'ΰ' + {{0x03CB, 0x0342}, 0x1FE7}, // 'ϋ' + combining -> 'ῧ' + {{0x03CE, 0x0345}, 0x1FF4}, // 'ώ' + combining -> 'ῴ' + {{0x03D2, 0x0301}, 0x03D3}, // 'ϒ' + combining -> 'ϓ' + {{0x03D2, 0x0308}, 0x03D4}, // 'ϒ' + combining -> 'ϔ' + {{0x0406, 0x0308}, 0x0407}, // 'І' + combining -> 'Ї' + {{0x0410, 0x0306}, 0x04D0}, // 'А' + combining -> 'Ӑ' + {{0x0410, 0x0308}, 0x04D2}, // 'А' + combining -> 'Ӓ' + {{0x0413, 0x0301}, 0x0403}, // 'Г' + combining -> 'Ѓ' + {{0x0415, 0x0300}, 0x0400}, // 'Е' + combining -> 'Ѐ' + {{0x0415, 0x0306}, 0x04D6}, // 'Е' + combining -> 'Ӗ' + {{0x0415, 0x0308}, 0x0401}, // 'Е' + combining -> 'Ё' + {{0x0416, 0x0306}, 0x04C1}, // 'Ж' + combining -> 'Ӂ' + {{0x0416, 0x0308}, 0x04DC}, // 'Ж' + combining -> 'Ӝ' + {{0x0417, 0x0308}, 0x04DE}, // 'З' + combining -> 'Ӟ' + {{0x0418, 0x0300}, 0x040D}, // 'И' + combining -> 'Ѝ' + {{0x0418, 0x0304}, 0x04E2}, // 'И' + combining -> 'Ӣ' + {{0x0418, 0x0306}, 0x0419}, // 'И' + combining -> 'Й' + {{0x0418, 0x0308}, 0x04E4}, // 'И' + combining -> 'Ӥ' + {{0x041A, 0x0301}, 0x040C}, // 'К' + combining -> 'Ќ' + {{0x041E, 0x0308}, 0x04E6}, // 'О' + combining -> 'Ӧ' + {{0x0423, 0x0304}, 0x04EE}, // 'У' + combining -> 'Ӯ' + {{0x0423, 0x0306}, 0x040E}, // 'У' + combining -> 'Ў' + {{0x0423, 0x0308}, 0x04F0}, // 'У' + combining -> 'Ӱ' + {{0x0423, 0x030B}, 0x04F2}, // 'У' + combining -> 'Ӳ' + {{0x0427, 0x0308}, 0x04F4}, // 'Ч' + combining -> 'Ӵ' + {{0x042B, 0x0308}, 0x04F8}, // 'Ы' + combining -> 'Ӹ' + {{0x042D, 0x0308}, 0x04EC}, // 'Э' + combining -> 'Ӭ' + {{0x0430, 0x0306}, 0x04D1}, // 'а' + combining -> 'ӑ' + {{0x0430, 0x0308}, 0x04D3}, // 'а' + combining -> 'ӓ' + {{0x0433, 0x0301}, 0x0453}, // 'г' + combining -> 'ѓ' + {{0x0435, 0x0300}, 0x0450}, // 'е' + combining -> 'ѐ' + {{0x0435, 0x0306}, 0x04D7}, // 'е' + combining -> 'ӗ' + {{0x0435, 0x0308}, 0x0451}, // 'е' + combining -> 'ё' + {{0x0436, 0x0306}, 0x04C2}, // 'ж' + combining -> 'ӂ' + {{0x0436, 0x0308}, 0x04DD}, // 'ж' + combining -> 'ӝ' + {{0x0437, 0x0308}, 0x04DF}, // 'з' + combining -> 'ӟ' + {{0x0438, 0x0300}, 0x045D}, // 'и' + combining -> 'ѝ' + {{0x0438, 0x0304}, 0x04E3}, // 'и' + combining -> 'ӣ' + {{0x0438, 0x0306}, 0x0439}, // 'и' + combining -> 'й' + {{0x0438, 0x0308}, 0x04E5}, // 'и' + combining -> 'ӥ' + {{0x043A, 0x0301}, 0x045C}, // 'к' + combining -> 'ќ' + {{0x043E, 0x0308}, 0x04E7}, // 'о' + combining -> 'ӧ' + {{0x0443, 0x0304}, 0x04EF}, // 'у' + combining -> 'ӯ' + {{0x0443, 0x0306}, 0x045E}, // 'у' + combining -> 'ў' + {{0x0443, 0x0308}, 0x04F1}, // 'у' + combining -> 'ӱ' + {{0x0443, 0x030B}, 0x04F3}, // 'у' + combining -> 'ӳ' + {{0x0447, 0x0308}, 0x04F5}, // 'ч' + combining -> 'ӵ' + {{0x044B, 0x0308}, 0x04F9}, // 'ы' + combining -> 'ӹ' + {{0x044D, 0x0308}, 0x04ED}, // 'э' + combining -> 'ӭ' + {{0x0456, 0x0308}, 0x0457}, // 'і' + combining -> 'ї' + {{0x0474, 0x030F}, 0x0476}, // 'Ѵ' + combining -> 'Ѷ' + {{0x0475, 0x030F}, 0x0477}, // 'ѵ' + combining -> 'ѷ' + {{0x04D8, 0x0308}, 0x04DA}, // 'Ә' + combining -> 'Ӛ' + {{0x04D9, 0x0308}, 0x04DB}, // 'ә' + combining -> 'ӛ' + {{0x04E8, 0x0308}, 0x04EA}, // 'Ө' + combining -> 'Ӫ' + {{0x04E9, 0x0308}, 0x04EB}, // 'ө' + combining -> 'ӫ' + {{0x05D0, 0x05B7}, 0xFB2E}, // 'א' + combining -> 'אַ' + {{0x05D0, 0x05B8}, 0xFB2F}, // 'א' + combining -> 'אָ' + {{0x05D0, 0x05BC}, 0xFB30}, // 'א' + combining -> 'אּ' + {{0x05D1, 0x05BC}, 0xFB31}, // 'ב' + combining -> 'בּ' + {{0x05D1, 0x05BF}, 0xFB4C}, // 'ב' + combining -> 'בֿ' + {{0x05D2, 0x05BC}, 0xFB32}, // 'ג' + combining -> 'גּ' + {{0x05D3, 0x05BC}, 0xFB33}, // 'ד' + combining -> 'דּ' + {{0x05D4, 0x05BC}, 0xFB34}, // 'ה' + combining -> 'הּ' + {{0x05D5, 0x05B9}, 0xFB4B}, // 'ו' + combining -> 'וֹ' + {{0x05D5, 0x05BC}, 0xFB35}, // 'ו' + combining -> 'וּ' + {{0x05D6, 0x05BC}, 0xFB36}, // 'ז' + combining -> 'זּ' + {{0x05D8, 0x05BC}, 0xFB38}, // 'ט' + combining -> 'טּ' + {{0x05D9, 0x05B4}, 0xFB1D}, // 'י' + combining -> 'יִ' + {{0x05D9, 0x05BC}, 0xFB39}, // 'י' + combining -> 'יּ' + {{0x05DA, 0x05BC}, 0xFB3A}, // 'ך' + combining -> 'ךּ' + {{0x05DB, 0x05BC}, 0xFB3B}, // 'כ' + combining -> 'כּ' + {{0x05DB, 0x05BF}, 0xFB4D}, // 'כ' + combining -> 'כֿ' + {{0x05DC, 0x05BC}, 0xFB3C}, // 'ל' + combining -> 'לּ' + {{0x05DE, 0x05BC}, 0xFB3E}, // 'מ' + combining -> 'מּ' + {{0x05E0, 0x05BC}, 0xFB40}, // 'נ' + combining -> 'נּ' + {{0x05E1, 0x05BC}, 0xFB41}, // 'ס' + combining -> 'סּ' + {{0x05E3, 0x05BC}, 0xFB43}, // 'ף' + combining -> 'ףּ' + {{0x05E4, 0x05BC}, 0xFB44}, // 'פ' + combining -> 'פּ' + {{0x05E4, 0x05BF}, 0xFB4E}, // 'פ' + combining -> 'פֿ' + {{0x05E6, 0x05BC}, 0xFB46}, // 'צ' + combining -> 'צּ' + {{0x05E7, 0x05BC}, 0xFB47}, // 'ק' + combining -> 'קּ' + {{0x05E8, 0x05BC}, 0xFB48}, // 'ר' + combining -> 'רּ' + {{0x05E9, 0x05BC}, 0xFB49}, // 'ש' + combining -> 'שּ' + {{0x05E9, 0x05C1}, 0xFB2A}, // 'ש' + combining -> 'שׁ' + {{0x05E9, 0x05C2}, 0xFB2B}, // 'ש' + combining -> 'שׂ' + {{0x05EA, 0x05BC}, 0xFB4A}, // 'ת' + combining -> 'תּ' + {{0x05F2, 0x05B7}, 0xFB1F}, // 'ײ' + combining -> 'ײַ' + {{0x0627, 0x0653}, 0x0622}, // 'ا' + combining -> 'آ' + {{0x0627, 0x0654}, 0x0623}, // 'ا' + combining -> 'أ' + {{0x0627, 0x0655}, 0x0625}, // 'ا' + combining -> 'إ' + {{0x0648, 0x0654}, 0x0624}, // 'و' + combining -> 'ؤ' + {{0x064A, 0x0654}, 0x0626}, // 'ي' + combining -> 'ئ' + {{0x06C1, 0x0654}, 0x06C2}, // 'ہ' + combining -> 'ۂ' + {{0x06D2, 0x0654}, 0x06D3}, // 'ے' + combining -> 'ۓ' + {{0x06D5, 0x0654}, 0x06C0}, // 'ە' + combining -> 'ۀ' + {{0x0915, 0x093C}, 0x0958}, // 'क' + combining -> 'क़' + {{0x0916, 0x093C}, 0x0959}, // 'ख' + combining -> 'ख़' + {{0x0917, 0x093C}, 0x095A}, // 'ग' + combining -> 'ग़' + {{0x091C, 0x093C}, 0x095B}, // 'ज' + combining -> 'ज़' + {{0x0921, 0x093C}, 0x095C}, // 'ड' + combining -> 'ड़' + {{0x0922, 0x093C}, 0x095D}, // 'ढ' + combining -> 'ढ़' + {{0x0928, 0x093C}, 0x0929}, // 'न' + combining -> 'ऩ' + {{0x092B, 0x093C}, 0x095E}, // 'फ' + combining -> 'फ़' + {{0x092F, 0x093C}, 0x095F}, // 'य' + combining -> 'य़' + {{0x0930, 0x093C}, 0x0931}, // 'र' + combining -> 'ऱ' + {{0x0933, 0x093C}, 0x0934}, // 'ळ' + combining -> 'ऴ' + {{0x09A1, 0x09BC}, 0x09DC}, // 'ড' + combining -> 'ড়' + {{0x09A2, 0x09BC}, 0x09DD}, // 'ঢ' + combining -> 'ঢ়' + {{0x09AF, 0x09BC}, 0x09DF}, // 'য' + combining -> 'য়' + {{0x09C7, 0x09BE}, 0x09CB}, // 'ে' + combining -> 'ো' + {{0x09C7, 0x09D7}, 0x09CC}, // 'ে' + combining -> 'ৌ' + {{0x0A16, 0x0A3C}, 0x0A59}, // 'ਖ' + combining -> 'ਖ਼' + {{0x0A17, 0x0A3C}, 0x0A5A}, // 'ਗ' + combining -> 'ਗ਼' + {{0x0A1C, 0x0A3C}, 0x0A5B}, // 'ਜ' + combining -> 'ਜ਼' + {{0x0A2B, 0x0A3C}, 0x0A5E}, // 'ਫ' + combining -> 'ਫ਼' + {{0x0A32, 0x0A3C}, 0x0A33}, // 'ਲ' + combining -> 'ਲ਼' + {{0x0A38, 0x0A3C}, 0x0A36}, // 'ਸ' + combining -> 'ਸ਼' + {{0x0B21, 0x0B3C}, 0x0B5C}, // 'ଡ' + combining -> 'ଡ଼' + {{0x0B22, 0x0B3C}, 0x0B5D}, // 'ଢ' + combining -> 'ଢ଼' + {{0x0B47, 0x0B3E}, 0x0B4B}, // 'େ' + combining -> 'ୋ' + {{0x0B47, 0x0B56}, 0x0B48}, // 'େ' + combining -> 'ୈ' + {{0x0B47, 0x0B57}, 0x0B4C}, // 'େ' + combining -> 'ୌ' + {{0x0B92, 0x0BD7}, 0x0B94}, // 'ஒ' + combining -> 'ஔ' + {{0x0BC6, 0x0BBE}, 0x0BCA}, // 'ெ' + combining -> 'ொ' + {{0x0BC6, 0x0BD7}, 0x0BCC}, // 'ெ' + combining -> 'ௌ' + {{0x0BC7, 0x0BBE}, 0x0BCB}, // 'ே' + combining -> 'ோ' + {{0x0C46, 0x0C56}, 0x0C48}, // 'ె' + combining -> 'ై' + {{0x0CBF, 0x0CD5}, 0x0CC0}, // 'ಿ' + combining -> 'ೀ' + {{0x0CC6, 0x0CC2}, 0x0CCA}, // 'ೆ' + combining -> 'ೊ' + {{0x0CC6, 0x0CD5}, 0x0CC7}, // 'ೆ' + combining -> 'ೇ' + {{0x0CC6, 0x0CD6}, 0x0CC8}, // 'ೆ' + combining -> 'ೈ' + {{0x0CCA, 0x0CD5}, 0x0CCB}, // 'ೊ' + combining -> 'ೋ' + {{0x0D46, 0x0D3E}, 0x0D4A}, // 'െ' + combining -> 'ൊ' + {{0x0D46, 0x0D57}, 0x0D4C}, // 'െ' + combining -> 'ൌ' + {{0x0D47, 0x0D3E}, 0x0D4B}, // 'േ' + combining -> 'ോ' + {{0x0DD9, 0x0DCA}, 0x0DDA}, // 'ෙ' + combining -> 'ේ' + {{0x0DD9, 0x0DCF}, 0x0DDC}, // 'ෙ' + combining -> 'ො' + {{0x0DD9, 0x0DDF}, 0x0DDE}, // 'ෙ' + combining -> 'ෞ' + {{0x0DDC, 0x0DCA}, 0x0DDD}, // 'ො' + combining -> 'ෝ' + {{0x0F40, 0x0FB5}, 0x0F69}, // 'ཀ' + combining -> 'ཀྵ' + {{0x0F42, 0x0FB7}, 0x0F43}, // 'ག' + combining -> 'གྷ' + {{0x0F4C, 0x0FB7}, 0x0F4D}, // 'ཌ' + combining -> 'ཌྷ' + {{0x0F51, 0x0FB7}, 0x0F52}, // 'ད' + combining -> 'དྷ' + {{0x0F56, 0x0FB7}, 0x0F57}, // 'བ' + combining -> 'བྷ' + {{0x0F5B, 0x0FB7}, 0x0F5C}, // 'ཛ' + combining -> 'ཛྷ' + {{0x0F71, 0x0F72}, 0x0F73}, // 'ཱ' + combining -> 'ཱི' + {{0x0F71, 0x0F74}, 0x0F75}, // 'ཱ' + combining -> 'ཱུ' + {{0x0F71, 0x0F80}, 0x0F81}, // 'ཱ' + combining -> 'ཱྀ' + {{0x0F90, 0x0FB5}, 0x0FB9}, // 'ྐ' + combining -> 'ྐྵ' + {{0x0F92, 0x0FB7}, 0x0F93}, // 'ྒ' + combining -> 'ྒྷ' + {{0x0F9C, 0x0FB7}, 0x0F9D}, // 'ྜ' + combining -> 'ྜྷ' + {{0x0FA1, 0x0FB7}, 0x0FA2}, // 'ྡ' + combining -> 'ྡྷ' + {{0x0FA6, 0x0FB7}, 0x0FA7}, // 'ྦ' + combining -> 'ྦྷ' + {{0x0FAB, 0x0FB7}, 0x0FAC}, // 'ྫ' + combining -> 'ྫྷ' + {{0x0FB2, 0x0F80}, 0x0F76}, // 'ྲ' + combining -> 'ྲྀ' + {{0x0FB3, 0x0F80}, 0x0F78}, // 'ླ' + combining -> 'ླྀ' + {{0x1025, 0x102E}, 0x1026}, // 'ဥ' + combining -> 'ဦ' + {{0x1B05, 0x1B35}, 0x1B06}, // 'ᬅ' + combining -> 'ᬆ' + {{0x1B07, 0x1B35}, 0x1B08}, // 'ᬇ' + combining -> 'ᬈ' + {{0x1B09, 0x1B35}, 0x1B0A}, // 'ᬉ' + combining -> 'ᬊ' + {{0x1B0B, 0x1B35}, 0x1B0C}, // 'ᬋ' + combining -> 'ᬌ' + {{0x1B0D, 0x1B35}, 0x1B0E}, // 'ᬍ' + combining -> 'ᬎ' + {{0x1B11, 0x1B35}, 0x1B12}, // 'ᬑ' + combining -> 'ᬒ' + {{0x1B3A, 0x1B35}, 0x1B3B}, // 'ᬺ' + combining -> 'ᬻ' + {{0x1B3C, 0x1B35}, 0x1B3D}, // 'ᬼ' + combining -> 'ᬽ' + {{0x1B3E, 0x1B35}, 0x1B40}, // 'ᬾ' + combining -> 'ᭀ' + {{0x1B3F, 0x1B35}, 0x1B41}, // 'ᬿ' + combining -> 'ᭁ' + {{0x1B42, 0x1B35}, 0x1B43}, // 'ᭂ' + combining -> 'ᭃ' + {{0x1E36, 0x0304}, 0x1E38}, // 'Ḷ' + combining -> 'Ḹ' + {{0x1E37, 0x0304}, 0x1E39}, // 'ḷ' + combining -> 'ḹ' + {{0x1E5A, 0x0304}, 0x1E5C}, // 'Ṛ' + combining -> 'Ṝ' + {{0x1E5B, 0x0304}, 0x1E5D}, // 'ṛ' + combining -> 'ṝ' + {{0x1E62, 0x0307}, 0x1E68}, // 'Ṣ' + combining -> 'Ṩ' + {{0x1E63, 0x0307}, 0x1E69}, // 'ṣ' + combining -> 'ṩ' + {{0x1EA0, 0x0302}, 0x1EAC}, // 'Ạ' + combining -> 'Ậ' + {{0x1EA0, 0x0306}, 0x1EB6}, // 'Ạ' + combining -> 'Ặ' + {{0x1EA1, 0x0302}, 0x1EAD}, // 'ạ' + combining -> 'ậ' + {{0x1EA1, 0x0306}, 0x1EB7}, // 'ạ' + combining -> 'ặ' + {{0x1EB8, 0x0302}, 0x1EC6}, // 'Ẹ' + combining -> 'Ệ' + {{0x1EB9, 0x0302}, 0x1EC7}, // 'ẹ' + combining -> 'ệ' + {{0x1ECC, 0x0302}, 0x1ED8}, // 'Ọ' + combining -> 'Ộ' + {{0x1ECD, 0x0302}, 0x1ED9}, // 'ọ' + combining -> 'ộ' + {{0x1F00, 0x0300}, 0x1F02}, // 'ἀ' + combining -> 'ἂ' + {{0x1F00, 0x0301}, 0x1F04}, // 'ἀ' + combining -> 'ἄ' + {{0x1F00, 0x0342}, 0x1F06}, // 'ἀ' + combining -> 'ἆ' + {{0x1F00, 0x0345}, 0x1F80}, // 'ἀ' + combining -> 'ᾀ' + {{0x1F01, 0x0300}, 0x1F03}, // 'ἁ' + combining -> 'ἃ' + {{0x1F01, 0x0301}, 0x1F05}, // 'ἁ' + combining -> 'ἅ' + {{0x1F01, 0x0342}, 0x1F07}, // 'ἁ' + combining -> 'ἇ' + {{0x1F01, 0x0345}, 0x1F81}, // 'ἁ' + combining -> 'ᾁ' + {{0x1F02, 0x0345}, 0x1F82}, // 'ἂ' + combining -> 'ᾂ' + {{0x1F03, 0x0345}, 0x1F83}, // 'ἃ' + combining -> 'ᾃ' + {{0x1F04, 0x0345}, 0x1F84}, // 'ἄ' + combining -> 'ᾄ' + {{0x1F05, 0x0345}, 0x1F85}, // 'ἅ' + combining -> 'ᾅ' + {{0x1F06, 0x0345}, 0x1F86}, // 'ἆ' + combining -> 'ᾆ' + {{0x1F07, 0x0345}, 0x1F87}, // 'ἇ' + combining -> 'ᾇ' + {{0x1F08, 0x0300}, 0x1F0A}, // 'Ἀ' + combining -> 'Ἂ' + {{0x1F08, 0x0301}, 0x1F0C}, // 'Ἀ' + combining -> 'Ἄ' + {{0x1F08, 0x0342}, 0x1F0E}, // 'Ἀ' + combining -> 'Ἆ' + {{0x1F08, 0x0345}, 0x1F88}, // 'Ἀ' + combining -> 'ᾈ' + {{0x1F09, 0x0300}, 0x1F0B}, // 'Ἁ' + combining -> 'Ἃ' + {{0x1F09, 0x0301}, 0x1F0D}, // 'Ἁ' + combining -> 'Ἅ' + {{0x1F09, 0x0342}, 0x1F0F}, // 'Ἁ' + combining -> 'Ἇ' + {{0x1F09, 0x0345}, 0x1F89}, // 'Ἁ' + combining -> 'ᾉ' + {{0x1F0A, 0x0345}, 0x1F8A}, // 'Ἂ' + combining -> 'ᾊ' + {{0x1F0B, 0x0345}, 0x1F8B}, // 'Ἃ' + combining -> 'ᾋ' + {{0x1F0C, 0x0345}, 0x1F8C}, // 'Ἄ' + combining -> 'ᾌ' + {{0x1F0D, 0x0345}, 0x1F8D}, // 'Ἅ' + combining -> 'ᾍ' + {{0x1F0E, 0x0345}, 0x1F8E}, // 'Ἆ' + combining -> 'ᾎ' + {{0x1F0F, 0x0345}, 0x1F8F}, // 'Ἇ' + combining -> 'ᾏ' + {{0x1F10, 0x0300}, 0x1F12}, // 'ἐ' + combining -> 'ἒ' + {{0x1F10, 0x0301}, 0x1F14}, // 'ἐ' + combining -> 'ἔ' + {{0x1F11, 0x0300}, 0x1F13}, // 'ἑ' + combining -> 'ἓ' + {{0x1F11, 0x0301}, 0x1F15}, // 'ἑ' + combining -> 'ἕ' + {{0x1F18, 0x0300}, 0x1F1A}, // 'Ἐ' + combining -> 'Ἒ' + {{0x1F18, 0x0301}, 0x1F1C}, // 'Ἐ' + combining -> 'Ἔ' + {{0x1F19, 0x0300}, 0x1F1B}, // 'Ἑ' + combining -> 'Ἓ' + {{0x1F19, 0x0301}, 0x1F1D}, // 'Ἑ' + combining -> 'Ἕ' + {{0x1F20, 0x0300}, 0x1F22}, // 'ἠ' + combining -> 'ἢ' + {{0x1F20, 0x0301}, 0x1F24}, // 'ἠ' + combining -> 'ἤ' + {{0x1F20, 0x0342}, 0x1F26}, // 'ἠ' + combining -> 'ἦ' + {{0x1F20, 0x0345}, 0x1F90}, // 'ἠ' + combining -> 'ᾐ' + {{0x1F21, 0x0300}, 0x1F23}, // 'ἡ' + combining -> 'ἣ' + {{0x1F21, 0x0301}, 0x1F25}, // 'ἡ' + combining -> 'ἥ' + {{0x1F21, 0x0342}, 0x1F27}, // 'ἡ' + combining -> 'ἧ' + {{0x1F21, 0x0345}, 0x1F91}, // 'ἡ' + combining -> 'ᾑ' + {{0x1F22, 0x0345}, 0x1F92}, // 'ἢ' + combining -> 'ᾒ' + {{0x1F23, 0x0345}, 0x1F93}, // 'ἣ' + combining -> 'ᾓ' + {{0x1F24, 0x0345}, 0x1F94}, // 'ἤ' + combining -> 'ᾔ' + {{0x1F25, 0x0345}, 0x1F95}, // 'ἥ' + combining -> 'ᾕ' + {{0x1F26, 0x0345}, 0x1F96}, // 'ἦ' + combining -> 'ᾖ' + {{0x1F27, 0x0345}, 0x1F97}, // 'ἧ' + combining -> 'ᾗ' + {{0x1F28, 0x0300}, 0x1F2A}, // 'Ἠ' + combining -> 'Ἢ' + {{0x1F28, 0x0301}, 0x1F2C}, // 'Ἠ' + combining -> 'Ἤ' + {{0x1F28, 0x0342}, 0x1F2E}, // 'Ἠ' + combining -> 'Ἦ' + {{0x1F28, 0x0345}, 0x1F98}, // 'Ἠ' + combining -> 'ᾘ' + {{0x1F29, 0x0300}, 0x1F2B}, // 'Ἡ' + combining -> 'Ἣ' + {{0x1F29, 0x0301}, 0x1F2D}, // 'Ἡ' + combining -> 'Ἥ' + {{0x1F29, 0x0342}, 0x1F2F}, // 'Ἡ' + combining -> 'Ἧ' + {{0x1F29, 0x0345}, 0x1F99}, // 'Ἡ' + combining -> 'ᾙ' + {{0x1F2A, 0x0345}, 0x1F9A}, // 'Ἢ' + combining -> 'ᾚ' + {{0x1F2B, 0x0345}, 0x1F9B}, // 'Ἣ' + combining -> 'ᾛ' + {{0x1F2C, 0x0345}, 0x1F9C}, // 'Ἤ' + combining -> 'ᾜ' + {{0x1F2D, 0x0345}, 0x1F9D}, // 'Ἥ' + combining -> 'ᾝ' + {{0x1F2E, 0x0345}, 0x1F9E}, // 'Ἦ' + combining -> 'ᾞ' + {{0x1F2F, 0x0345}, 0x1F9F}, // 'Ἧ' + combining -> 'ᾟ' + {{0x1F30, 0x0300}, 0x1F32}, // 'ἰ' + combining -> 'ἲ' + {{0x1F30, 0x0301}, 0x1F34}, // 'ἰ' + combining -> 'ἴ' + {{0x1F30, 0x0342}, 0x1F36}, // 'ἰ' + combining -> 'ἶ' + {{0x1F31, 0x0300}, 0x1F33}, // 'ἱ' + combining -> 'ἳ' + {{0x1F31, 0x0301}, 0x1F35}, // 'ἱ' + combining -> 'ἵ' + {{0x1F31, 0x0342}, 0x1F37}, // 'ἱ' + combining -> 'ἷ' + {{0x1F38, 0x0300}, 0x1F3A}, // 'Ἰ' + combining -> 'Ἲ' + {{0x1F38, 0x0301}, 0x1F3C}, // 'Ἰ' + combining -> 'Ἴ' + {{0x1F38, 0x0342}, 0x1F3E}, // 'Ἰ' + combining -> 'Ἶ' + {{0x1F39, 0x0300}, 0x1F3B}, // 'Ἱ' + combining -> 'Ἳ' + {{0x1F39, 0x0301}, 0x1F3D}, // 'Ἱ' + combining -> 'Ἵ' + {{0x1F39, 0x0342}, 0x1F3F}, // 'Ἱ' + combining -> 'Ἷ' + {{0x1F40, 0x0300}, 0x1F42}, // 'ὀ' + combining -> 'ὂ' + {{0x1F40, 0x0301}, 0x1F44}, // 'ὀ' + combining -> 'ὄ' + {{0x1F41, 0x0300}, 0x1F43}, // 'ὁ' + combining -> 'ὃ' + {{0x1F41, 0x0301}, 0x1F45}, // 'ὁ' + combining -> 'ὅ' + {{0x1F48, 0x0300}, 0x1F4A}, // 'Ὀ' + combining -> 'Ὂ' + {{0x1F48, 0x0301}, 0x1F4C}, // 'Ὀ' + combining -> 'Ὄ' + {{0x1F49, 0x0300}, 0x1F4B}, // 'Ὁ' + combining -> 'Ὃ' + {{0x1F49, 0x0301}, 0x1F4D}, // 'Ὁ' + combining -> 'Ὅ' + {{0x1F50, 0x0300}, 0x1F52}, // 'ὐ' + combining -> 'ὒ' + {{0x1F50, 0x0301}, 0x1F54}, // 'ὐ' + combining -> 'ὔ' + {{0x1F50, 0x0342}, 0x1F56}, // 'ὐ' + combining -> 'ὖ' + {{0x1F51, 0x0300}, 0x1F53}, // 'ὑ' + combining -> 'ὓ' + {{0x1F51, 0x0301}, 0x1F55}, // 'ὑ' + combining -> 'ὕ' + {{0x1F51, 0x0342}, 0x1F57}, // 'ὑ' + combining -> 'ὗ' + {{0x1F59, 0x0300}, 0x1F5B}, // 'Ὑ' + combining -> 'Ὓ' + {{0x1F59, 0x0301}, 0x1F5D}, // 'Ὑ' + combining -> 'Ὕ' + {{0x1F59, 0x0342}, 0x1F5F}, // 'Ὑ' + combining -> 'Ὗ' + {{0x1F60, 0x0300}, 0x1F62}, // 'ὠ' + combining -> 'ὢ' + {{0x1F60, 0x0301}, 0x1F64}, // 'ὠ' + combining -> 'ὤ' + {{0x1F60, 0x0342}, 0x1F66}, // 'ὠ' + combining -> 'ὦ' + {{0x1F60, 0x0345}, 0x1FA0}, // 'ὠ' + combining -> 'ᾠ' + {{0x1F61, 0x0300}, 0x1F63}, // 'ὡ' + combining -> 'ὣ' + {{0x1F61, 0x0301}, 0x1F65}, // 'ὡ' + combining -> 'ὥ' + {{0x1F61, 0x0342}, 0x1F67}, // 'ὡ' + combining -> 'ὧ' + {{0x1F61, 0x0345}, 0x1FA1}, // 'ὡ' + combining -> 'ᾡ' + {{0x1F62, 0x0345}, 0x1FA2}, // 'ὢ' + combining -> 'ᾢ' + {{0x1F63, 0x0345}, 0x1FA3}, // 'ὣ' + combining -> 'ᾣ' + {{0x1F64, 0x0345}, 0x1FA4}, // 'ὤ' + combining -> 'ᾤ' + {{0x1F65, 0x0345}, 0x1FA5}, // 'ὥ' + combining -> 'ᾥ' + {{0x1F66, 0x0345}, 0x1FA6}, // 'ὦ' + combining -> 'ᾦ' + {{0x1F67, 0x0345}, 0x1FA7}, // 'ὧ' + combining -> 'ᾧ' + {{0x1F68, 0x0300}, 0x1F6A}, // 'Ὠ' + combining -> 'Ὢ' + {{0x1F68, 0x0301}, 0x1F6C}, // 'Ὠ' + combining -> 'Ὤ' + {{0x1F68, 0x0342}, 0x1F6E}, // 'Ὠ' + combining -> 'Ὦ' + {{0x1F68, 0x0345}, 0x1FA8}, // 'Ὠ' + combining -> 'ᾨ' + {{0x1F69, 0x0300}, 0x1F6B}, // 'Ὡ' + combining -> 'Ὣ' + {{0x1F69, 0x0301}, 0x1F6D}, // 'Ὡ' + combining -> 'Ὥ' + {{0x1F69, 0x0342}, 0x1F6F}, // 'Ὡ' + combining -> 'Ὧ' + {{0x1F69, 0x0345}, 0x1FA9}, // 'Ὡ' + combining -> 'ᾩ' + {{0x1F6A, 0x0345}, 0x1FAA}, // 'Ὢ' + combining -> 'ᾪ' + {{0x1F6B, 0x0345}, 0x1FAB}, // 'Ὣ' + combining -> 'ᾫ' + {{0x1F6C, 0x0345}, 0x1FAC}, // 'Ὤ' + combining -> 'ᾬ' + {{0x1F6D, 0x0345}, 0x1FAD}, // 'Ὥ' + combining -> 'ᾭ' + {{0x1F6E, 0x0345}, 0x1FAE}, // 'Ὦ' + combining -> 'ᾮ' + {{0x1F6F, 0x0345}, 0x1FAF}, // 'Ὧ' + combining -> 'ᾯ' + {{0x1F70, 0x0345}, 0x1FB2}, // 'ὰ' + combining -> 'ᾲ' + {{0x1F74, 0x0345}, 0x1FC2}, // 'ὴ' + combining -> 'ῂ' + {{0x1F7C, 0x0345}, 0x1FF2}, // 'ὼ' + combining -> 'ῲ' + {{0x1FB6, 0x0345}, 0x1FB7}, // 'ᾶ' + combining -> 'ᾷ' + {{0x1FBF, 0x0300}, 0x1FCD}, // '᾿' + combining -> '῍' + {{0x1FBF, 0x0301}, 0x1FCE}, // '᾿' + combining -> '῎' + {{0x1FBF, 0x0342}, 0x1FCF}, // '᾿' + combining -> '῏' + {{0x1FC6, 0x0345}, 0x1FC7}, // 'ῆ' + combining -> 'ῇ' + {{0x1FF6, 0x0345}, 0x1FF7}, // 'ῶ' + combining -> 'ῷ' + {{0x1FFE, 0x0300}, 0x1FDD}, // '῾' + combining -> '῝' + {{0x1FFE, 0x0301}, 0x1FDE}, // '῾' + combining -> '῞' + {{0x1FFE, 0x0342}, 0x1FDF}, // '῾' + combining -> '῟' + {{0x2190, 0x0338}, 0x219A}, // '←' + combining -> '↚' + {{0x2192, 0x0338}, 0x219B}, // '→' + combining -> '↛' + {{0x2194, 0x0338}, 0x21AE}, // '↔' + combining -> '↮' + {{0x21D0, 0x0338}, 0x21CD}, // '⇐' + combining -> '⇍' + {{0x21D2, 0x0338}, 0x21CF}, // '⇒' + combining -> '⇏' + {{0x21D4, 0x0338}, 0x21CE}, // '⇔' + combining -> '⇎' + {{0x2203, 0x0338}, 0x2204}, // '∃' + combining -> '∄' + {{0x2208, 0x0338}, 0x2209}, // '∈' + combining -> '∉' + {{0x220B, 0x0338}, 0x220C}, // '∋' + combining -> '∌' + {{0x2223, 0x0338}, 0x2224}, // '∣' + combining -> '∤' + {{0x2225, 0x0338}, 0x2226}, // '∥' + combining -> '∦' + {{0x223C, 0x0338}, 0x2241}, // '∼' + combining -> '≁' + {{0x2243, 0x0338}, 0x2244}, // '≃' + combining -> '≄' + {{0x2245, 0x0338}, 0x2247}, // '≅' + combining -> '≇' + {{0x2248, 0x0338}, 0x2249}, // '≈' + combining -> '≉' + {{0x224D, 0x0338}, 0x226D}, // '≍' + combining -> '≭' + {{0x2261, 0x0338}, 0x2262}, // '≡' + combining -> '≢' + {{0x2264, 0x0338}, 0x2270}, // '≤' + combining -> '≰' + {{0x2265, 0x0338}, 0x2271}, // '≥' + combining -> '≱' + {{0x2272, 0x0338}, 0x2274}, // '≲' + combining -> '≴' + {{0x2273, 0x0338}, 0x2275}, // '≳' + combining -> '≵' + {{0x2276, 0x0338}, 0x2278}, // '≶' + combining -> '≸' + {{0x2277, 0x0338}, 0x2279}, // '≷' + combining -> '≹' + {{0x227A, 0x0338}, 0x2280}, // '≺' + combining -> '⊀' + {{0x227B, 0x0338}, 0x2281}, // '≻' + combining -> '⊁' + {{0x227C, 0x0338}, 0x22E0}, // '≼' + combining -> '⋠' + {{0x227D, 0x0338}, 0x22E1}, // '≽' + combining -> '⋡' + {{0x2282, 0x0338}, 0x2284}, // '⊂' + combining -> '⊄' + {{0x2283, 0x0338}, 0x2285}, // '⊃' + combining -> '⊅' + {{0x2286, 0x0338}, 0x2288}, // '⊆' + combining -> '⊈' + {{0x2287, 0x0338}, 0x2289}, // '⊇' + combining -> '⊉' + {{0x2291, 0x0338}, 0x22E2}, // '⊑' + combining -> '⋢' + {{0x2292, 0x0338}, 0x22E3}, // '⊒' + combining -> '⋣' + {{0x22A2, 0x0338}, 0x22AC}, // '⊢' + combining -> '⊬' + {{0x22A8, 0x0338}, 0x22AD}, // '⊨' + combining -> '⊭' + {{0x22A9, 0x0338}, 0x22AE}, // '⊩' + combining -> '⊮' + {{0x22AB, 0x0338}, 0x22AF}, // '⊫' + combining -> '⊯' + {{0x22B2, 0x0338}, 0x22EA}, // '⊲' + combining -> '⋪' + {{0x22B3, 0x0338}, 0x22EB}, // '⊳' + combining -> '⋫' + {{0x22B4, 0x0338}, 0x22EC}, // '⊴' + combining -> '⋬' + {{0x22B5, 0x0338}, 0x22ED}, // '⊵' + combining -> '⋭' + {{0x2ADD, 0x0338}, 0x2ADC}, // '⫝' + combining -> '⫝̸' + {{0x3046, 0x3099}, 0x3094}, // 'う' + combining -> 'ゔ' + {{0x304B, 0x3099}, 0x304C}, // 'か' + combining -> 'が' + {{0x304D, 0x3099}, 0x304E}, // 'き' + combining -> 'ぎ' + {{0x304F, 0x3099}, 0x3050}, // 'く' + combining -> 'ぐ' + {{0x3051, 0x3099}, 0x3052}, // 'け' + combining -> 'げ' + {{0x3053, 0x3099}, 0x3054}, // 'こ' + combining -> 'ご' + {{0x3055, 0x3099}, 0x3056}, // 'さ' + combining -> 'ざ' + {{0x3057, 0x3099}, 0x3058}, // 'し' + combining -> 'じ' + {{0x3059, 0x3099}, 0x305A}, // 'す' + combining -> 'ず' + {{0x305B, 0x3099}, 0x305C}, // 'せ' + combining -> 'ぜ' + {{0x305D, 0x3099}, 0x305E}, // 'そ' + combining -> 'ぞ' + {{0x305F, 0x3099}, 0x3060}, // 'た' + combining -> 'だ' + {{0x3061, 0x3099}, 0x3062}, // 'ち' + combining -> 'ぢ' + {{0x3064, 0x3099}, 0x3065}, // 'つ' + combining -> 'づ' + {{0x3066, 0x3099}, 0x3067}, // 'て' + combining -> 'で' + {{0x3068, 0x3099}, 0x3069}, // 'と' + combining -> 'ど' + {{0x306F, 0x3099}, 0x3070}, // 'は' + combining -> 'ば' + {{0x306F, 0x309A}, 0x3071}, // 'は' + combining -> 'ぱ' + {{0x3072, 0x3099}, 0x3073}, // 'ひ' + combining -> 'び' + {{0x3072, 0x309A}, 0x3074}, // 'ひ' + combining -> 'ぴ' + {{0x3075, 0x3099}, 0x3076}, // 'ふ' + combining -> 'ぶ' + {{0x3075, 0x309A}, 0x3077}, // 'ふ' + combining -> 'ぷ' + {{0x3078, 0x3099}, 0x3079}, // 'へ' + combining -> 'べ' + {{0x3078, 0x309A}, 0x307A}, // 'へ' + combining -> 'ぺ' + {{0x307B, 0x3099}, 0x307C}, // 'ほ' + combining -> 'ぼ' + {{0x307B, 0x309A}, 0x307D}, // 'ほ' + combining -> 'ぽ' + {{0x309D, 0x3099}, 0x309E}, // 'ゝ' + combining -> 'ゞ' + {{0x30A6, 0x3099}, 0x30F4}, // 'ウ' + combining -> 'ヴ' + {{0x30AB, 0x3099}, 0x30AC}, // 'カ' + combining -> 'ガ' + {{0x30AD, 0x3099}, 0x30AE}, // 'キ' + combining -> 'ギ' + {{0x30AF, 0x3099}, 0x30B0}, // 'ク' + combining -> 'グ' + {{0x30B1, 0x3099}, 0x30B2}, // 'ケ' + combining -> 'ゲ' + {{0x30B3, 0x3099}, 0x30B4}, // 'コ' + combining -> 'ゴ' + {{0x30B5, 0x3099}, 0x30B6}, // 'サ' + combining -> 'ザ' + {{0x30B7, 0x3099}, 0x30B8}, // 'シ' + combining -> 'ジ' + {{0x30B9, 0x3099}, 0x30BA}, // 'ス' + combining -> 'ズ' + {{0x30BB, 0x3099}, 0x30BC}, // 'セ' + combining -> 'ゼ' + {{0x30BD, 0x3099}, 0x30BE}, // 'ソ' + combining -> 'ゾ' + {{0x30BF, 0x3099}, 0x30C0}, // 'タ' + combining -> 'ダ' + {{0x30C1, 0x3099}, 0x30C2}, // 'チ' + combining -> 'ヂ' + {{0x30C4, 0x3099}, 0x30C5}, // 'ツ' + combining -> 'ヅ' + {{0x30C6, 0x3099}, 0x30C7}, // 'テ' + combining -> 'デ' + {{0x30C8, 0x3099}, 0x30C9}, // 'ト' + combining -> 'ド' + {{0x30CF, 0x3099}, 0x30D0}, // 'ハ' + combining -> 'バ' + {{0x30CF, 0x309A}, 0x30D1}, // 'ハ' + combining -> 'パ' + {{0x30D2, 0x3099}, 0x30D3}, // 'ヒ' + combining -> 'ビ' + {{0x30D2, 0x309A}, 0x30D4}, // 'ヒ' + combining -> 'ピ' + {{0x30D5, 0x3099}, 0x30D6}, // 'フ' + combining -> 'ブ' + {{0x30D5, 0x309A}, 0x30D7}, // 'フ' + combining -> 'プ' + {{0x30D8, 0x3099}, 0x30D9}, // 'ヘ' + combining -> 'ベ' + {{0x30D8, 0x309A}, 0x30DA}, // 'ヘ' + combining -> 'ペ' + {{0x30DB, 0x3099}, 0x30DC}, // 'ホ' + combining -> 'ボ' + {{0x30DB, 0x309A}, 0x30DD}, // 'ホ' + combining -> 'ポ' + {{0x30EF, 0x3099}, 0x30F7}, // 'ワ' + combining -> 'ヷ' + {{0x30F0, 0x3099}, 0x30F8}, // 'ヰ' + combining -> 'ヸ' + {{0x30F1, 0x3099}, 0x30F9}, // 'ヱ' + combining -> 'ヹ' + {{0x30F2, 0x3099}, 0x30FA}, // 'ヲ' + combining -> 'ヺ' + {{0x30FD, 0x3099}, 0x30FE}, // 'ヽ' + combining -> 'ヾ' + {{0xFB49, 0x05C1}, 0xFB2C}, // 'שּ' + combining -> 'שּׁ' + {{0xFB49, 0x05C2}, 0xFB2D}, // 'שּ' + combining -> 'שּׂ' + {{0x105D2, 0x0307}, 0x105C9}, + {{0x105DA, 0x0307}, 0x105E4}, + {{0x11099, 0x110BA}, 0x1109A}, // '𑂙' + combining -> '𑂚' + {{0x1109B, 0x110BA}, 0x1109C}, // '𑂛' + combining -> '𑂜' + {{0x110A5, 0x110BA}, 0x110AB}, // '𑂥' + combining -> '𑂫' + {{0x11131, 0x11127}, 0x1112E}, // '𑄱' + combining -> '𑄮' + {{0x11132, 0x11127}, 0x1112F}, // '𑄲' + combining -> '𑄯' + {{0x11347, 0x1133E}, 0x1134B}, // '𑍇' + combining -> '𑍋' + {{0x11347, 0x11357}, 0x1134C}, // '𑍇' + combining -> '𑍌' + {{0x11382, 0x113C9}, 0x11383}, + {{0x11384, 0x113BB}, 0x11385}, + {{0x1138B, 0x113C2}, 0x1138E}, + {{0x11390, 0x113C9}, 0x11391}, + {{0x113C2, 0x113B8}, 0x113C7}, + {{0x113C2, 0x113C2}, 0x113C5}, + {{0x113C2, 0x113C9}, 0x113C8}, + {{0x114B9, 0x114B0}, 0x114BC}, // '𑒹' + combining -> '𑒼' + {{0x114B9, 0x114BA}, 0x114BB}, // '𑒹' + combining -> '𑒻' + {{0x114B9, 0x114BD}, 0x114BE}, // '𑒹' + combining -> '𑒾' + {{0x115B8, 0x115AF}, 0x115BA}, // '𑖸' + combining -> '𑖺' + {{0x115B9, 0x115AF}, 0x115BB}, // '𑖹' + combining -> '𑖻' + {{0x11935, 0x11930}, 0x11938}, // '𑤵' + combining -> '𑤸' + {{0x1611E, 0x1611E}, 0x16121}, + {{0x1611E, 0x1611F}, 0x16123}, + {{0x1611E, 0x16120}, 0x16125}, + {{0x1611E, 0x16129}, 0x16122}, + {{0x16121, 0x1611F}, 0x16126}, + {{0x16121, 0x16120}, 0x16128}, + {{0x16122, 0x1611F}, 0x16127}, + {{0x16129, 0x1611F}, 0x16124}, + {{0x16D63, 0x16D67}, 0x16D69}, + {{0x16D67, 0x16D67}, 0x16D68}, + {{0x16D69, 0x16D67}, 0x16D6A}, + {{0x1D157, 0x1D165}, 0x1D15E}, // '𝅗' + combining -> '𝅗𝅥' + {{0x1D158, 0x1D165}, 0x1D15F}, // '𝅘' + combining -> '𝅘𝅥' + {{0x1D15F, 0x1D16E}, 0x1D160}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅮' + {{0x1D15F, 0x1D16F}, 0x1D161}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅯' + {{0x1D15F, 0x1D170}, 0x1D162}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅰' + {{0x1D15F, 0x1D171}, 0x1D163}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅱' + {{0x1D15F, 0x1D172}, 0x1D164}, // '𝅘𝅥' + combining -> '𝅘𝅥𝅲' + {{0x1D1B9, 0x1D165}, 0x1D1BB}, // '𝆹' + combining -> '𝆹𝅥' + {{0x1D1BA, 0x1D165}, 0x1D1BC}, // '𝆺' + combining -> '𝆺𝅥' + {{0x1D1BB, 0x1D16E}, 0x1D1BD}, // '𝆹𝅥' + combining -> '𝆹𝅥𝅮' + {{0x1D1BB, 0x1D16F}, 0x1D1BF}, // '𝆹𝅥' + combining -> '𝆹𝅥𝅯' + {{0x1D1BC, 0x1D16E}, 0x1D1BE}, // '𝆺𝅥' + combining -> '𝆺𝅥𝅮' + {{0x1D1BC, 0x1D16F}, 0x1D1C0}, // '𝆺𝅥' + combining -> '𝆺𝅥𝅯' }; // Canonical combining class table: codepoint -> combining_class (0-254) -// Only non-zero values are stored; missing entries have combining class 0 -// (starter) +// Only non-zero values are stored; missing entries have combining class 0 (starter) static const std::unordered_map combining_class_table = { {0x0300, 230}, // '̀' cc=230 {0x0301, 230}, // '́' cc=230 @@ -3219,11 +3214,11 @@ static const std::unordered_map combining_class_table = { {0x0331, 220}, // '̱' cc=220 {0x0332, 220}, // '̲' cc=220 {0x0333, 220}, // '̳' cc=220 - {0x0334, 1}, // '̴' cc=1 - {0x0335, 1}, // '̵' cc=1 - {0x0336, 1}, // '̶' cc=1 - {0x0337, 1}, // '̷' cc=1 - {0x0338, 1}, // '̸' cc=1 + {0x0334, 1}, // '̴' cc=1 + {0x0335, 1}, // '̵' cc=1 + {0x0336, 1}, // '̶' cc=1 + {0x0337, 1}, // '̷' cc=1 + {0x0338, 1}, // '̸' cc=1 {0x0339, 220}, // '̹' cc=220 {0x033A, 220}, // '̺' cc=220 {0x033B, 220}, // '̻' cc=220 @@ -3314,26 +3309,26 @@ static const std::unordered_map combining_class_table = { {0x05AD, 222}, // '֭' cc=222 {0x05AE, 228}, // '֮' cc=228 {0x05AF, 230}, // '֯' cc=230 - {0x05B0, 10}, // 'ְ' cc=10 - {0x05B1, 11}, // 'ֱ' cc=11 - {0x05B2, 12}, // 'ֲ' cc=12 - {0x05B3, 13}, // 'ֳ' cc=13 - {0x05B4, 14}, // 'ִ' cc=14 - {0x05B5, 15}, // 'ֵ' cc=15 - {0x05B6, 16}, // 'ֶ' cc=16 - {0x05B7, 17}, // 'ַ' cc=17 - {0x05B8, 18}, // 'ָ' cc=18 - {0x05B9, 19}, // 'ֹ' cc=19 - {0x05BA, 19}, // 'ֺ' cc=19 - {0x05BB, 20}, // 'ֻ' cc=20 - {0x05BC, 21}, // 'ּ' cc=21 - {0x05BD, 22}, // 'ֽ' cc=22 - {0x05BF, 23}, // 'ֿ' cc=23 - {0x05C1, 24}, // 'ׁ' cc=24 - {0x05C2, 25}, // 'ׂ' cc=25 + {0x05B0, 10}, // 'ְ' cc=10 + {0x05B1, 11}, // 'ֱ' cc=11 + {0x05B2, 12}, // 'ֲ' cc=12 + {0x05B3, 13}, // 'ֳ' cc=13 + {0x05B4, 14}, // 'ִ' cc=14 + {0x05B5, 15}, // 'ֵ' cc=15 + {0x05B6, 16}, // 'ֶ' cc=16 + {0x05B7, 17}, // 'ַ' cc=17 + {0x05B8, 18}, // 'ָ' cc=18 + {0x05B9, 19}, // 'ֹ' cc=19 + {0x05BA, 19}, // 'ֺ' cc=19 + {0x05BB, 20}, // 'ֻ' cc=20 + {0x05BC, 21}, // 'ּ' cc=21 + {0x05BD, 22}, // 'ֽ' cc=22 + {0x05BF, 23}, // 'ֿ' cc=23 + {0x05C1, 24}, // 'ׁ' cc=24 + {0x05C2, 25}, // 'ׂ' cc=25 {0x05C4, 230}, // 'ׄ' cc=230 {0x05C5, 220}, // 'ׅ' cc=220 - {0x05C7, 18}, // 'ׇ' cc=18 + {0x05C7, 18}, // 'ׇ' cc=18 {0x0610, 230}, // 'ؐ' cc=230 {0x0611, 230}, // 'ؑ' cc=230 {0x0612, 230}, // 'ؒ' cc=230 @@ -3342,17 +3337,17 @@ static const std::unordered_map combining_class_table = { {0x0615, 230}, // 'ؕ' cc=230 {0x0616, 230}, // 'ؖ' cc=230 {0x0617, 230}, // 'ؗ' cc=230 - {0x0618, 30}, // 'ؘ' cc=30 - {0x0619, 31}, // 'ؙ' cc=31 - {0x061A, 32}, // 'ؚ' cc=32 - {0x064B, 27}, // 'ً' cc=27 - {0x064C, 28}, // 'ٌ' cc=28 - {0x064D, 29}, // 'ٍ' cc=29 - {0x064E, 30}, // 'َ' cc=30 - {0x064F, 31}, // 'ُ' cc=31 - {0x0650, 32}, // 'ِ' cc=32 - {0x0651, 33}, // 'ّ' cc=33 - {0x0652, 34}, // 'ْ' cc=34 + {0x0618, 30}, // 'ؘ' cc=30 + {0x0619, 31}, // 'ؙ' cc=31 + {0x061A, 32}, // 'ؚ' cc=32 + {0x064B, 27}, // 'ً' cc=27 + {0x064C, 28}, // 'ٌ' cc=28 + {0x064D, 29}, // 'ٍ' cc=29 + {0x064E, 30}, // 'َ' cc=30 + {0x064F, 31}, // 'ُ' cc=31 + {0x0650, 32}, // 'ِ' cc=32 + {0x0651, 33}, // 'ّ' cc=33 + {0x0652, 34}, // 'ْ' cc=34 {0x0653, 230}, // 'ٓ' cc=230 {0x0654, 230}, // 'ٔ' cc=230 {0x0655, 220}, // 'ٕ' cc=220 @@ -3366,7 +3361,7 @@ static const std::unordered_map combining_class_table = { {0x065D, 230}, // 'ٝ' cc=230 {0x065E, 230}, // 'ٞ' cc=230 {0x065F, 220}, // 'ٟ' cc=220 - {0x0670, 35}, // 'ٰ' cc=35 + {0x0670, 35}, // 'ٰ' cc=35 {0x06D6, 230}, // 'ۖ' cc=230 {0x06D7, 230}, // 'ۗ' cc=230 {0x06D8, 230}, // 'ۘ' cc=230 @@ -3386,7 +3381,7 @@ static const std::unordered_map combining_class_table = { {0x06EB, 230}, // '۫' cc=230 {0x06EC, 230}, // '۬' cc=230 {0x06ED, 220}, // 'ۭ' cc=220 - {0x0711, 36}, // 'ܑ' cc=36 + {0x0711, 36}, // 'ܑ' cc=36 {0x0730, 230}, // 'ܰ' cc=230 {0x0731, 220}, // 'ܱ' cc=220 {0x0732, 230}, // 'ܲ' cc=230 @@ -3494,9 +3489,9 @@ static const std::unordered_map combining_class_table = { {0x08ED, 220}, // '࣭' cc=220 {0x08EE, 220}, // '࣮' cc=220 {0x08EF, 220}, // '࣯' cc=220 - {0x08F0, 27}, // 'ࣰ' cc=27 - {0x08F1, 28}, // 'ࣱ' cc=28 - {0x08F2, 29}, // 'ࣲ' cc=29 + {0x08F0, 27}, // 'ࣰ' cc=27 + {0x08F1, 28}, // 'ࣱ' cc=28 + {0x08F2, 29}, // 'ࣲ' cc=29 {0x08F3, 230}, // 'ࣳ' cc=230 {0x08F4, 230}, // 'ࣴ' cc=230 {0x08F5, 230}, // 'ࣵ' cc=230 @@ -3510,42 +3505,42 @@ static const std::unordered_map combining_class_table = { {0x08FD, 230}, // 'ࣽ' cc=230 {0x08FE, 230}, // 'ࣾ' cc=230 {0x08FF, 230}, // 'ࣿ' cc=230 - {0x093C, 7}, // '़' cc=7 - {0x094D, 9}, // '्' cc=9 + {0x093C, 7}, // '़' cc=7 + {0x094D, 9}, // '्' cc=9 {0x0951, 230}, // '॑' cc=230 {0x0952, 220}, // '॒' cc=220 {0x0953, 230}, // '॓' cc=230 {0x0954, 230}, // '॔' cc=230 - {0x09BC, 7}, // '়' cc=7 - {0x09CD, 9}, // '্' cc=9 + {0x09BC, 7}, // '়' cc=7 + {0x09CD, 9}, // '্' cc=9 {0x09FE, 230}, // '৾' cc=230 - {0x0A3C, 7}, // '਼' cc=7 - {0x0A4D, 9}, // '੍' cc=9 - {0x0ABC, 7}, // '઼' cc=7 - {0x0ACD, 9}, // '્' cc=9 - {0x0B3C, 7}, // '଼' cc=7 - {0x0B4D, 9}, // '୍' cc=9 - {0x0BCD, 9}, // '்' cc=9 - {0x0C3C, 7}, // cc=7 - {0x0C4D, 9}, // '్' cc=9 - {0x0C55, 84}, // 'ౕ' cc=84 - {0x0C56, 91}, // 'ౖ' cc=91 - {0x0CBC, 7}, // '಼' cc=7 - {0x0CCD, 9}, // '್' cc=9 - {0x0D3B, 9}, // '഻' cc=9 - {0x0D3C, 9}, // '഼' cc=9 - {0x0D4D, 9}, // '്' cc=9 - {0x0DCA, 9}, // '්' cc=9 + {0x0A3C, 7}, // '਼' cc=7 + {0x0A4D, 9}, // '੍' cc=9 + {0x0ABC, 7}, // '઼' cc=7 + {0x0ACD, 9}, // '્' cc=9 + {0x0B3C, 7}, // '଼' cc=7 + {0x0B4D, 9}, // '୍' cc=9 + {0x0BCD, 9}, // '்' cc=9 + {0x0C3C, 7}, // cc=7 + {0x0C4D, 9}, // '్' cc=9 + {0x0C55, 84}, // 'ౕ' cc=84 + {0x0C56, 91}, // 'ౖ' cc=91 + {0x0CBC, 7}, // '಼' cc=7 + {0x0CCD, 9}, // '್' cc=9 + {0x0D3B, 9}, // '഻' cc=9 + {0x0D3C, 9}, // '഼' cc=9 + {0x0D4D, 9}, // '്' cc=9 + {0x0DCA, 9}, // '්' cc=9 {0x0E38, 103}, // 'ุ' cc=103 {0x0E39, 103}, // 'ู' cc=103 - {0x0E3A, 9}, // 'ฺ' cc=9 + {0x0E3A, 9}, // 'ฺ' cc=9 {0x0E48, 107}, // '่' cc=107 {0x0E49, 107}, // '้' cc=107 {0x0E4A, 107}, // '๊' cc=107 {0x0E4B, 107}, // '๋' cc=107 {0x0EB8, 118}, // 'ຸ' cc=118 {0x0EB9, 118}, // 'ູ' cc=118 - {0x0EBA, 9}, // '຺' cc=9 + {0x0EBA, 9}, // '຺' cc=9 {0x0EC8, 122}, // '່' cc=122 {0x0EC9, 122}, // '້' cc=122 {0x0ECA, 122}, // '໊' cc=122 @@ -3565,21 +3560,21 @@ static const std::unordered_map combining_class_table = { {0x0F80, 130}, // 'ྀ' cc=130 {0x0F82, 230}, // 'ྂ' cc=230 {0x0F83, 230}, // 'ྃ' cc=230 - {0x0F84, 9}, // '྄' cc=9 + {0x0F84, 9}, // '྄' cc=9 {0x0F86, 230}, // '྆' cc=230 {0x0F87, 230}, // '྇' cc=230 {0x0FC6, 220}, // '࿆' cc=220 - {0x1037, 7}, // '့' cc=7 - {0x1039, 9}, // '္' cc=9 - {0x103A, 9}, // '်' cc=9 + {0x1037, 7}, // '့' cc=7 + {0x1039, 9}, // '္' cc=9 + {0x103A, 9}, // '်' cc=9 {0x108D, 220}, // 'ႍ' cc=220 {0x135D, 230}, // '፝' cc=230 {0x135E, 230}, // '፞' cc=230 {0x135F, 230}, // '፟' cc=230 - {0x1714, 9}, // '᜔' cc=9 - {0x1715, 9}, // cc=9 - {0x1734, 9}, // '᜴' cc=9 - {0x17D2, 9}, // '្' cc=9 + {0x1714, 9}, // '᜔' cc=9 + {0x1715, 9}, // cc=9 + {0x1734, 9}, // '᜴' cc=9 + {0x17D2, 9}, // '្' cc=9 {0x17DD, 230}, // '៝' cc=230 {0x18A9, 228}, // 'ᢩ' cc=228 {0x1939, 222}, // '᤹' cc=222 @@ -3587,7 +3582,7 @@ static const std::unordered_map combining_class_table = { {0x193B, 220}, // '᤻' cc=220 {0x1A17, 230}, // 'ᨗ' cc=230 {0x1A18, 220}, // 'ᨘ' cc=220 - {0x1A60, 9}, // '᩠' cc=9 + {0x1A60, 9}, // '᩠' cc=9 {0x1A75, 230}, // '᩵' cc=230 {0x1A76, 230}, // '᩶' cc=230 {0x1A77, 230}, // '᩷' cc=230 @@ -3654,8 +3649,8 @@ static const std::unordered_map combining_class_table = { {0x1AE9, 230}, // cc=230 {0x1AEA, 230}, // cc=230 {0x1AEB, 234}, // cc=234 - {0x1B34, 7}, // '᬴' cc=7 - {0x1B44, 9}, // '᭄' cc=9 + {0x1B34, 7}, // '᬴' cc=7 + {0x1B44, 9}, // '᭄' cc=9 {0x1B6B, 230}, // '᭫' cc=230 {0x1B6C, 220}, // '᭬' cc=220 {0x1B6D, 230}, // '᭭' cc=230 @@ -3665,16 +3660,16 @@ static const std::unordered_map combining_class_table = { {0x1B71, 230}, // '᭱' cc=230 {0x1B72, 230}, // '᭲' cc=230 {0x1B73, 230}, // '᭳' cc=230 - {0x1BAA, 9}, // '᮪' cc=9 - {0x1BAB, 9}, // '᮫' cc=9 - {0x1BE6, 7}, // '᯦' cc=7 - {0x1BF2, 9}, // '᯲' cc=9 - {0x1BF3, 9}, // '᯳' cc=9 - {0x1C37, 7}, // '᰷' cc=7 + {0x1BAA, 9}, // '᮪' cc=9 + {0x1BAB, 9}, // '᮫' cc=9 + {0x1BE6, 7}, // '᯦' cc=7 + {0x1BF2, 9}, // '᯲' cc=9 + {0x1BF3, 9}, // '᯳' cc=9 + {0x1C37, 7}, // '᰷' cc=7 {0x1CD0, 230}, // '᳐' cc=230 {0x1CD1, 230}, // '᳑' cc=230 {0x1CD2, 230}, // '᳒' cc=230 - {0x1CD4, 1}, // '᳔' cc=1 + {0x1CD4, 1}, // '᳔' cc=1 {0x1CD5, 220}, // '᳕' cc=220 {0x1CD6, 220}, // '᳖' cc=220 {0x1CD7, 220}, // '᳗' cc=220 @@ -3687,13 +3682,13 @@ static const std::unordered_map combining_class_table = { {0x1CDE, 220}, // '᳞' cc=220 {0x1CDF, 220}, // '᳟' cc=220 {0x1CE0, 230}, // '᳠' cc=230 - {0x1CE2, 1}, // '᳢' cc=1 - {0x1CE3, 1}, // '᳣' cc=1 - {0x1CE4, 1}, // '᳤' cc=1 - {0x1CE5, 1}, // '᳥' cc=1 - {0x1CE6, 1}, // '᳦' cc=1 - {0x1CE7, 1}, // '᳧' cc=1 - {0x1CE8, 1}, // '᳨' cc=1 + {0x1CE2, 1}, // '᳢' cc=1 + {0x1CE3, 1}, // '᳣' cc=1 + {0x1CE4, 1}, // '᳤' cc=1 + {0x1CE5, 1}, // '᳥' cc=1 + {0x1CE6, 1}, // '᳦' cc=1 + {0x1CE7, 1}, // '᳧' cc=1 + {0x1CE8, 1}, // '᳨' cc=1 {0x1CED, 220}, // '᳭' cc=220 {0x1CF4, 230}, // '᳴' cc=230 {0x1CF8, 230}, // '᳸' cc=230 @@ -3764,25 +3759,25 @@ static const std::unordered_map combining_class_table = { {0x1DFF, 220}, // '᷿' cc=220 {0x20D0, 230}, // '⃐' cc=230 {0x20D1, 230}, // '⃑' cc=230 - {0x20D2, 1}, // '⃒' cc=1 - {0x20D3, 1}, // '⃓' cc=1 + {0x20D2, 1}, // '⃒' cc=1 + {0x20D3, 1}, // '⃓' cc=1 {0x20D4, 230}, // '⃔' cc=230 {0x20D5, 230}, // '⃕' cc=230 {0x20D6, 230}, // '⃖' cc=230 {0x20D7, 230}, // '⃗' cc=230 - {0x20D8, 1}, // '⃘' cc=1 - {0x20D9, 1}, // '⃙' cc=1 - {0x20DA, 1}, // '⃚' cc=1 + {0x20D8, 1}, // '⃘' cc=1 + {0x20D9, 1}, // '⃙' cc=1 + {0x20DA, 1}, // '⃚' cc=1 {0x20DB, 230}, // '⃛' cc=230 {0x20DC, 230}, // '⃜' cc=230 {0x20E1, 230}, // '⃡' cc=230 - {0x20E5, 1}, // '⃥' cc=1 - {0x20E6, 1}, // '⃦' cc=1 + {0x20E5, 1}, // '⃥' cc=1 + {0x20E6, 1}, // '⃦' cc=1 {0x20E7, 230}, // '⃧' cc=230 {0x20E8, 220}, // '⃨' cc=220 {0x20E9, 230}, // '⃩' cc=230 - {0x20EA, 1}, // '⃪' cc=1 - {0x20EB, 1}, // '⃫' cc=1 + {0x20EA, 1}, // '⃪' cc=1 + {0x20EB, 1}, // '⃫' cc=1 {0x20EC, 220}, // '⃬' cc=220 {0x20ED, 220}, // '⃭' cc=220 {0x20EE, 220}, // '⃮' cc=220 @@ -3791,7 +3786,7 @@ static const std::unordered_map combining_class_table = { {0x2CEF, 230}, // '⳯' cc=230 {0x2CF0, 230}, // '⳰' cc=230 {0x2CF1, 230}, // '⳱' cc=230 - {0x2D7F, 9}, // '⵿' cc=9 + {0x2D7F, 9}, // '⵿' cc=9 {0x2DE0, 230}, // 'ⷠ' cc=230 {0x2DE1, 230}, // 'ⷡ' cc=230 {0x2DE2, 230}, // 'ⷢ' cc=230 @@ -3830,8 +3825,8 @@ static const std::unordered_map combining_class_table = { {0x302D, 222}, // '〭' cc=222 {0x302E, 224}, // '〮' cc=224 {0x302F, 224}, // '〯' cc=224 - {0x3099, 8}, // '゙' cc=8 - {0x309A, 8}, // '゚' cc=8 + {0x3099, 8}, // '゙' cc=8 + {0x309A, 8}, // '゚' cc=8 {0xA66F, 230}, // '꙯' cc=230 {0xA674, 230}, // 'ꙴ' cc=230 {0xA675, 230}, // 'ꙵ' cc=230 @@ -3847,9 +3842,9 @@ static const std::unordered_map combining_class_table = { {0xA69F, 230}, // 'ꚟ' cc=230 {0xA6F0, 230}, // '꛰' cc=230 {0xA6F1, 230}, // '꛱' cc=230 - {0xA806, 9}, // '꠆' cc=9 - {0xA82C, 9}, // '꠬' cc=9 - {0xA8C4, 9}, // '꣄' cc=9 + {0xA806, 9}, // '꠆' cc=9 + {0xA82C, 9}, // '꠬' cc=9 + {0xA8C4, 9}, // '꣄' cc=9 {0xA8E0, 230}, // '꣠' cc=230 {0xA8E1, 230}, // '꣡' cc=230 {0xA8E2, 230}, // '꣢' cc=230 @@ -3871,9 +3866,9 @@ static const std::unordered_map combining_class_table = { {0xA92B, 220}, // '꤫' cc=220 {0xA92C, 220}, // '꤬' cc=220 {0xA92D, 220}, // '꤭' cc=220 - {0xA953, 9}, // '꥓' cc=9 - {0xA9B3, 7}, // '꦳' cc=7 - {0xA9C0, 9}, // '꧀' cc=9 + {0xA953, 9}, // '꥓' cc=9 + {0xA9B3, 7}, // '꦳' cc=7 + {0xA9C0, 9}, // '꧀' cc=9 {0xAAB0, 230}, // 'ꪰ' cc=230 {0xAAB2, 230}, // 'ꪲ' cc=230 {0xAAB3, 230}, // 'ꪳ' cc=230 @@ -3883,9 +3878,9 @@ static const std::unordered_map combining_class_table = { {0xAABE, 230}, // 'ꪾ' cc=230 {0xAABF, 230}, // '꪿' cc=230 {0xAAC1, 230}, // '꫁' cc=230 - {0xAAF6, 9}, // '꫶' cc=9 - {0xABED, 9}, // '꯭' cc=9 - {0xFB1E, 26}, // 'ﬞ' cc=26 + {0xAAF6, 9}, // '꫶' cc=9 + {0xABED, 9}, // '꯭' cc=9 + {0xFB1E, 26}, // 'ﬞ' cc=26 {0xFE20, 230}, // '︠' cc=230 {0xFE21, 230}, // '︡' cc=230 {0xFE22, 230}, // '︢' cc=230 @@ -3902,239 +3897,239 @@ static const std::unordered_map combining_class_table = { {0xFE2D, 220}, // '︭' cc=220 {0xFE2E, 230}, // '︮' cc=230 {0xFE2F, 230}, // '︯' cc=230 - {0x101FD, 220}, // '𐇽' cc=220 - {0x102E0, 220}, // '𐋠' cc=220 - {0x10376, 230}, // '𐍶' cc=230 - {0x10377, 230}, // '𐍷' cc=230 - {0x10378, 230}, // '𐍸' cc=230 - {0x10379, 230}, // '𐍹' cc=230 - {0x1037A, 230}, // '𐍺' cc=230 - {0x10A0D, 220}, // '𐨍' cc=220 - {0x10A0F, 230}, // '𐨏' cc=230 - {0x10A38, 230}, // '𐨸' cc=230 - {0x10A39, 1}, // '𐨹' cc=1 - {0x10A3A, 220}, // '𐨺' cc=220 - {0x10A3F, 9}, // '𐨿' cc=9 - {0x10AE5, 230}, // '𐫥' cc=230 - {0x10AE6, 220}, // '𐫦' cc=220 - {0x10D24, 230}, // '𐴤' cc=230 - {0x10D25, 230}, // '𐴥' cc=230 - {0x10D26, 230}, // '𐴦' cc=230 - {0x10D27, 230}, // '𐴧' cc=230 - {0x10D69, 230}, // cc=230 - {0x10D6A, 230}, // cc=230 - {0x10D6B, 230}, // cc=230 - {0x10D6C, 230}, // cc=230 - {0x10D6D, 230}, // cc=230 - {0x10EAB, 230}, // '𐺫' cc=230 - {0x10EAC, 230}, // '𐺬' cc=230 - {0x10EFA, 220}, // cc=220 - {0x10EFB, 220}, // cc=220 - {0x10EFD, 220}, // cc=220 - {0x10EFE, 220}, // cc=220 - {0x10EFF, 220}, // cc=220 - {0x10F46, 220}, // '𐽆' cc=220 - {0x10F47, 220}, // '𐽇' cc=220 - {0x10F48, 230}, // '𐽈' cc=230 - {0x10F49, 230}, // '𐽉' cc=230 - {0x10F4A, 230}, // '𐽊' cc=230 - {0x10F4B, 220}, // '𐽋' cc=220 - {0x10F4C, 230}, // '𐽌' cc=230 - {0x10F4D, 220}, // '𐽍' cc=220 - {0x10F4E, 220}, // '𐽎' cc=220 - {0x10F4F, 220}, // '𐽏' cc=220 - {0x10F50, 220}, // '𐽐' cc=220 - {0x10F82, 230}, // cc=230 - {0x10F83, 220}, // cc=220 - {0x10F84, 230}, // cc=230 - {0x10F85, 220}, // cc=220 - {0x11046, 9}, // '𑁆' cc=9 - {0x11070, 9}, // cc=9 - {0x1107F, 9}, // '𑁿' cc=9 - {0x110B9, 9}, // '𑂹' cc=9 - {0x110BA, 7}, // '𑂺' cc=7 - {0x11100, 230}, // '𑄀' cc=230 - {0x11101, 230}, // '𑄁' cc=230 - {0x11102, 230}, // '𑄂' cc=230 - {0x11133, 9}, // '𑄳' cc=9 - {0x11134, 9}, // '𑄴' cc=9 - {0x11173, 7}, // '𑅳' cc=7 - {0x111C0, 9}, // '𑇀' cc=9 - {0x111CA, 7}, // '𑇊' cc=7 - {0x11235, 9}, // '𑈵' cc=9 - {0x11236, 7}, // '𑈶' cc=7 - {0x112E9, 7}, // '𑋩' cc=7 - {0x112EA, 9}, // '𑋪' cc=9 - {0x1133B, 7}, // '𑌻' cc=7 - {0x1133C, 7}, // '𑌼' cc=7 - {0x1134D, 9}, // '𑍍' cc=9 - {0x11366, 230}, // '𑍦' cc=230 - {0x11367, 230}, // '𑍧' cc=230 - {0x11368, 230}, // '𑍨' cc=230 - {0x11369, 230}, // '𑍩' cc=230 - {0x1136A, 230}, // '𑍪' cc=230 - {0x1136B, 230}, // '𑍫' cc=230 - {0x1136C, 230}, // '𑍬' cc=230 - {0x11370, 230}, // '𑍰' cc=230 - {0x11371, 230}, // '𑍱' cc=230 - {0x11372, 230}, // '𑍲' cc=230 - {0x11373, 230}, // '𑍳' cc=230 - {0x11374, 230}, // '𑍴' cc=230 - {0x113CE, 9}, // cc=9 - {0x113CF, 9}, // cc=9 - {0x113D0, 9}, // cc=9 - {0x11442, 9}, // '𑑂' cc=9 - {0x11446, 7}, // '𑑆' cc=7 - {0x1145E, 230}, // '𑑞' cc=230 - {0x114C2, 9}, // '𑓂' cc=9 - {0x114C3, 7}, // '𑓃' cc=7 - {0x115BF, 9}, // '𑖿' cc=9 - {0x115C0, 7}, // '𑗀' cc=7 - {0x1163F, 9}, // '𑘿' cc=9 - {0x116B6, 9}, // '𑚶' cc=9 - {0x116B7, 7}, // '𑚷' cc=7 - {0x1172B, 9}, // '𑜫' cc=9 - {0x11839, 9}, // '𑠹' cc=9 - {0x1183A, 7}, // '𑠺' cc=7 - {0x1193D, 9}, // '𑤽' cc=9 - {0x1193E, 9}, // '𑤾' cc=9 - {0x11943, 7}, // '𑥃' cc=7 - {0x119E0, 9}, // '𑧠' cc=9 - {0x11A34, 9}, // '𑨴' cc=9 - {0x11A47, 9}, // '𑩇' cc=9 - {0x11A99, 9}, // '𑪙' cc=9 - {0x11C3F, 9}, // '𑰿' cc=9 - {0x11D42, 7}, // '𑵂' cc=7 - {0x11D44, 9}, // '𑵄' cc=9 - {0x11D45, 9}, // '𑵅' cc=9 - {0x11D97, 9}, // '𑶗' cc=9 - {0x11F41, 9}, // cc=9 - {0x11F42, 9}, // cc=9 - {0x1612F, 9}, // cc=9 - {0x16AF0, 1}, // '𖫰' cc=1 - {0x16AF1, 1}, // '𖫱' cc=1 - {0x16AF2, 1}, // '𖫲' cc=1 - {0x16AF3, 1}, // '𖫳' cc=1 - {0x16AF4, 1}, // '𖫴' cc=1 - {0x16B30, 230}, // '𖬰' cc=230 - {0x16B31, 230}, // '𖬱' cc=230 - {0x16B32, 230}, // '𖬲' cc=230 - {0x16B33, 230}, // '𖬳' cc=230 - {0x16B34, 230}, // '𖬴' cc=230 - {0x16B35, 230}, // '𖬵' cc=230 - {0x16B36, 230}, // '𖬶' cc=230 - {0x16FF0, 6}, // '𖿰' cc=6 - {0x16FF1, 6}, // '𖿱' cc=6 - {0x1BC9E, 1}, // '𛲞' cc=1 - {0x1D165, 216}, // '𝅥' cc=216 - {0x1D166, 216}, // '𝅦' cc=216 - {0x1D167, 1}, // '𝅧' cc=1 - {0x1D168, 1}, // '𝅨' cc=1 - {0x1D169, 1}, // '𝅩' cc=1 - {0x1D16D, 226}, // '𝅭' cc=226 - {0x1D16E, 216}, // '𝅮' cc=216 - {0x1D16F, 216}, // '𝅯' cc=216 - {0x1D170, 216}, // '𝅰' cc=216 - {0x1D171, 216}, // '𝅱' cc=216 - {0x1D172, 216}, // '𝅲' cc=216 - {0x1D17B, 220}, // '𝅻' cc=220 - {0x1D17C, 220}, // '𝅼' cc=220 - {0x1D17D, 220}, // '𝅽' cc=220 - {0x1D17E, 220}, // '𝅾' cc=220 - {0x1D17F, 220}, // '𝅿' cc=220 - {0x1D180, 220}, // '𝆀' cc=220 - {0x1D181, 220}, // '𝆁' cc=220 - {0x1D182, 220}, // '𝆂' cc=220 - {0x1D185, 230}, // '𝆅' cc=230 - {0x1D186, 230}, // '𝆆' cc=230 - {0x1D187, 230}, // '𝆇' cc=230 - {0x1D188, 230}, // '𝆈' cc=230 - {0x1D189, 230}, // '𝆉' cc=230 - {0x1D18A, 220}, // '𝆊' cc=220 - {0x1D18B, 220}, // '𝆋' cc=220 - {0x1D1AA, 230}, // '𝆪' cc=230 - {0x1D1AB, 230}, // '𝆫' cc=230 - {0x1D1AC, 230}, // '𝆬' cc=230 - {0x1D1AD, 230}, // '𝆭' cc=230 - {0x1D242, 230}, // '𝉂' cc=230 - {0x1D243, 230}, // '𝉃' cc=230 - {0x1D244, 230}, // '𝉄' cc=230 - {0x1E000, 230}, // '𞀀' cc=230 - {0x1E001, 230}, // '𞀁' cc=230 - {0x1E002, 230}, // '𞀂' cc=230 - {0x1E003, 230}, // '𞀃' cc=230 - {0x1E004, 230}, // '𞀄' cc=230 - {0x1E005, 230}, // '𞀅' cc=230 - {0x1E006, 230}, // '𞀆' cc=230 - {0x1E008, 230}, // '𞀈' cc=230 - {0x1E009, 230}, // '𞀉' cc=230 - {0x1E00A, 230}, // '𞀊' cc=230 - {0x1E00B, 230}, // '𞀋' cc=230 - {0x1E00C, 230}, // '𞀌' cc=230 - {0x1E00D, 230}, // '𞀍' cc=230 - {0x1E00E, 230}, // '𞀎' cc=230 - {0x1E00F, 230}, // '𞀏' cc=230 - {0x1E010, 230}, // '𞀐' cc=230 - {0x1E011, 230}, // '𞀑' cc=230 - {0x1E012, 230}, // '𞀒' cc=230 - {0x1E013, 230}, // '𞀓' cc=230 - {0x1E014, 230}, // '𞀔' cc=230 - {0x1E015, 230}, // '𞀕' cc=230 - {0x1E016, 230}, // '𞀖' cc=230 - {0x1E017, 230}, // '𞀗' cc=230 - {0x1E018, 230}, // '𞀘' cc=230 - {0x1E01B, 230}, // '𞀛' cc=230 - {0x1E01C, 230}, // '𞀜' cc=230 - {0x1E01D, 230}, // '𞀝' cc=230 - {0x1E01E, 230}, // '𞀞' cc=230 - {0x1E01F, 230}, // '𞀟' cc=230 - {0x1E020, 230}, // '𞀠' cc=230 - {0x1E021, 230}, // '𞀡' cc=230 - {0x1E023, 230}, // '𞀣' cc=230 - {0x1E024, 230}, // '𞀤' cc=230 - {0x1E026, 230}, // '𞀦' cc=230 - {0x1E027, 230}, // '𞀧' cc=230 - {0x1E028, 230}, // '𞀨' cc=230 - {0x1E029, 230}, // '𞀩' cc=230 - {0x1E02A, 230}, // '𞀪' cc=230 - {0x1E08F, 230}, // cc=230 - {0x1E130, 230}, // '𞄰' cc=230 - {0x1E131, 230}, // '𞄱' cc=230 - {0x1E132, 230}, // '𞄲' cc=230 - {0x1E133, 230}, // '𞄳' cc=230 - {0x1E134, 230}, // '𞄴' cc=230 - {0x1E135, 230}, // '𞄵' cc=230 - {0x1E136, 230}, // '𞄶' cc=230 - {0x1E2AE, 230}, // cc=230 - {0x1E2EC, 230}, // '𞋬' cc=230 - {0x1E2ED, 230}, // '𞋭' cc=230 - {0x1E2EE, 230}, // '𞋮' cc=230 - {0x1E2EF, 230}, // '𞋯' cc=230 - {0x1E4EC, 232}, // cc=232 - {0x1E4ED, 232}, // cc=232 - {0x1E4EE, 220}, // cc=220 - {0x1E4EF, 230}, // cc=230 - {0x1E5EE, 230}, // cc=230 - {0x1E5EF, 220}, // cc=220 - {0x1E6E3, 230}, // cc=230 - {0x1E6E6, 230}, // cc=230 - {0x1E6EE, 230}, // cc=230 - {0x1E6EF, 230}, // cc=230 - {0x1E6F5, 230}, // cc=230 - {0x1E8D0, 220}, // '𞣐' cc=220 - {0x1E8D1, 220}, // '𞣑' cc=220 - {0x1E8D2, 220}, // '𞣒' cc=220 - {0x1E8D3, 220}, // '𞣓' cc=220 - {0x1E8D4, 220}, // '𞣔' cc=220 - {0x1E8D5, 220}, // '𞣕' cc=220 - {0x1E8D6, 220}, // '𞣖' cc=220 - {0x1E944, 230}, // '𞥄' cc=230 - {0x1E945, 230}, // '𞥅' cc=230 - {0x1E946, 230}, // '𞥆' cc=230 - {0x1E947, 230}, // '𞥇' cc=230 - {0x1E948, 230}, // '𞥈' cc=230 - {0x1E949, 230}, // '𞥉' cc=230 - {0x1E94A, 7}, // '𞥊' cc=7 + {0x101FD, 220}, // '𐇽' cc=220 + {0x102E0, 220}, // '𐋠' cc=220 + {0x10376, 230}, // '𐍶' cc=230 + {0x10377, 230}, // '𐍷' cc=230 + {0x10378, 230}, // '𐍸' cc=230 + {0x10379, 230}, // '𐍹' cc=230 + {0x1037A, 230}, // '𐍺' cc=230 + {0x10A0D, 220}, // '𐨍' cc=220 + {0x10A0F, 230}, // '𐨏' cc=230 + {0x10A38, 230}, // '𐨸' cc=230 + {0x10A39, 1}, // '𐨹' cc=1 + {0x10A3A, 220}, // '𐨺' cc=220 + {0x10A3F, 9}, // '𐨿' cc=9 + {0x10AE5, 230}, // '𐫥' cc=230 + {0x10AE6, 220}, // '𐫦' cc=220 + {0x10D24, 230}, // '𐴤' cc=230 + {0x10D25, 230}, // '𐴥' cc=230 + {0x10D26, 230}, // '𐴦' cc=230 + {0x10D27, 230}, // '𐴧' cc=230 + {0x10D69, 230}, // cc=230 + {0x10D6A, 230}, // cc=230 + {0x10D6B, 230}, // cc=230 + {0x10D6C, 230}, // cc=230 + {0x10D6D, 230}, // cc=230 + {0x10EAB, 230}, // '𐺫' cc=230 + {0x10EAC, 230}, // '𐺬' cc=230 + {0x10EFA, 220}, // cc=220 + {0x10EFB, 220}, // cc=220 + {0x10EFD, 220}, // cc=220 + {0x10EFE, 220}, // cc=220 + {0x10EFF, 220}, // cc=220 + {0x10F46, 220}, // '𐽆' cc=220 + {0x10F47, 220}, // '𐽇' cc=220 + {0x10F48, 230}, // '𐽈' cc=230 + {0x10F49, 230}, // '𐽉' cc=230 + {0x10F4A, 230}, // '𐽊' cc=230 + {0x10F4B, 220}, // '𐽋' cc=220 + {0x10F4C, 230}, // '𐽌' cc=230 + {0x10F4D, 220}, // '𐽍' cc=220 + {0x10F4E, 220}, // '𐽎' cc=220 + {0x10F4F, 220}, // '𐽏' cc=220 + {0x10F50, 220}, // '𐽐' cc=220 + {0x10F82, 230}, // cc=230 + {0x10F83, 220}, // cc=220 + {0x10F84, 230}, // cc=230 + {0x10F85, 220}, // cc=220 + {0x11046, 9}, // '𑁆' cc=9 + {0x11070, 9}, // cc=9 + {0x1107F, 9}, // '𑁿' cc=9 + {0x110B9, 9}, // '𑂹' cc=9 + {0x110BA, 7}, // '𑂺' cc=7 + {0x11100, 230}, // '𑄀' cc=230 + {0x11101, 230}, // '𑄁' cc=230 + {0x11102, 230}, // '𑄂' cc=230 + {0x11133, 9}, // '𑄳' cc=9 + {0x11134, 9}, // '𑄴' cc=9 + {0x11173, 7}, // '𑅳' cc=7 + {0x111C0, 9}, // '𑇀' cc=9 + {0x111CA, 7}, // '𑇊' cc=7 + {0x11235, 9}, // '𑈵' cc=9 + {0x11236, 7}, // '𑈶' cc=7 + {0x112E9, 7}, // '𑋩' cc=7 + {0x112EA, 9}, // '𑋪' cc=9 + {0x1133B, 7}, // '𑌻' cc=7 + {0x1133C, 7}, // '𑌼' cc=7 + {0x1134D, 9}, // '𑍍' cc=9 + {0x11366, 230}, // '𑍦' cc=230 + {0x11367, 230}, // '𑍧' cc=230 + {0x11368, 230}, // '𑍨' cc=230 + {0x11369, 230}, // '𑍩' cc=230 + {0x1136A, 230}, // '𑍪' cc=230 + {0x1136B, 230}, // '𑍫' cc=230 + {0x1136C, 230}, // '𑍬' cc=230 + {0x11370, 230}, // '𑍰' cc=230 + {0x11371, 230}, // '𑍱' cc=230 + {0x11372, 230}, // '𑍲' cc=230 + {0x11373, 230}, // '𑍳' cc=230 + {0x11374, 230}, // '𑍴' cc=230 + {0x113CE, 9}, // cc=9 + {0x113CF, 9}, // cc=9 + {0x113D0, 9}, // cc=9 + {0x11442, 9}, // '𑑂' cc=9 + {0x11446, 7}, // '𑑆' cc=7 + {0x1145E, 230}, // '𑑞' cc=230 + {0x114C2, 9}, // '𑓂' cc=9 + {0x114C3, 7}, // '𑓃' cc=7 + {0x115BF, 9}, // '𑖿' cc=9 + {0x115C0, 7}, // '𑗀' cc=7 + {0x1163F, 9}, // '𑘿' cc=9 + {0x116B6, 9}, // '𑚶' cc=9 + {0x116B7, 7}, // '𑚷' cc=7 + {0x1172B, 9}, // '𑜫' cc=9 + {0x11839, 9}, // '𑠹' cc=9 + {0x1183A, 7}, // '𑠺' cc=7 + {0x1193D, 9}, // '𑤽' cc=9 + {0x1193E, 9}, // '𑤾' cc=9 + {0x11943, 7}, // '𑥃' cc=7 + {0x119E0, 9}, // '𑧠' cc=9 + {0x11A34, 9}, // '𑨴' cc=9 + {0x11A47, 9}, // '𑩇' cc=9 + {0x11A99, 9}, // '𑪙' cc=9 + {0x11C3F, 9}, // '𑰿' cc=9 + {0x11D42, 7}, // '𑵂' cc=7 + {0x11D44, 9}, // '𑵄' cc=9 + {0x11D45, 9}, // '𑵅' cc=9 + {0x11D97, 9}, // '𑶗' cc=9 + {0x11F41, 9}, // cc=9 + {0x11F42, 9}, // cc=9 + {0x1612F, 9}, // cc=9 + {0x16AF0, 1}, // '𖫰' cc=1 + {0x16AF1, 1}, // '𖫱' cc=1 + {0x16AF2, 1}, // '𖫲' cc=1 + {0x16AF3, 1}, // '𖫳' cc=1 + {0x16AF4, 1}, // '𖫴' cc=1 + {0x16B30, 230}, // '𖬰' cc=230 + {0x16B31, 230}, // '𖬱' cc=230 + {0x16B32, 230}, // '𖬲' cc=230 + {0x16B33, 230}, // '𖬳' cc=230 + {0x16B34, 230}, // '𖬴' cc=230 + {0x16B35, 230}, // '𖬵' cc=230 + {0x16B36, 230}, // '𖬶' cc=230 + {0x16FF0, 6}, // '𖿰' cc=6 + {0x16FF1, 6}, // '𖿱' cc=6 + {0x1BC9E, 1}, // '𛲞' cc=1 + {0x1D165, 216}, // '𝅥' cc=216 + {0x1D166, 216}, // '𝅦' cc=216 + {0x1D167, 1}, // '𝅧' cc=1 + {0x1D168, 1}, // '𝅨' cc=1 + {0x1D169, 1}, // '𝅩' cc=1 + {0x1D16D, 226}, // '𝅭' cc=226 + {0x1D16E, 216}, // '𝅮' cc=216 + {0x1D16F, 216}, // '𝅯' cc=216 + {0x1D170, 216}, // '𝅰' cc=216 + {0x1D171, 216}, // '𝅱' cc=216 + {0x1D172, 216}, // '𝅲' cc=216 + {0x1D17B, 220}, // '𝅻' cc=220 + {0x1D17C, 220}, // '𝅼' cc=220 + {0x1D17D, 220}, // '𝅽' cc=220 + {0x1D17E, 220}, // '𝅾' cc=220 + {0x1D17F, 220}, // '𝅿' cc=220 + {0x1D180, 220}, // '𝆀' cc=220 + {0x1D181, 220}, // '𝆁' cc=220 + {0x1D182, 220}, // '𝆂' cc=220 + {0x1D185, 230}, // '𝆅' cc=230 + {0x1D186, 230}, // '𝆆' cc=230 + {0x1D187, 230}, // '𝆇' cc=230 + {0x1D188, 230}, // '𝆈' cc=230 + {0x1D189, 230}, // '𝆉' cc=230 + {0x1D18A, 220}, // '𝆊' cc=220 + {0x1D18B, 220}, // '𝆋' cc=220 + {0x1D1AA, 230}, // '𝆪' cc=230 + {0x1D1AB, 230}, // '𝆫' cc=230 + {0x1D1AC, 230}, // '𝆬' cc=230 + {0x1D1AD, 230}, // '𝆭' cc=230 + {0x1D242, 230}, // '𝉂' cc=230 + {0x1D243, 230}, // '𝉃' cc=230 + {0x1D244, 230}, // '𝉄' cc=230 + {0x1E000, 230}, // '𞀀' cc=230 + {0x1E001, 230}, // '𞀁' cc=230 + {0x1E002, 230}, // '𞀂' cc=230 + {0x1E003, 230}, // '𞀃' cc=230 + {0x1E004, 230}, // '𞀄' cc=230 + {0x1E005, 230}, // '𞀅' cc=230 + {0x1E006, 230}, // '𞀆' cc=230 + {0x1E008, 230}, // '𞀈' cc=230 + {0x1E009, 230}, // '𞀉' cc=230 + {0x1E00A, 230}, // '𞀊' cc=230 + {0x1E00B, 230}, // '𞀋' cc=230 + {0x1E00C, 230}, // '𞀌' cc=230 + {0x1E00D, 230}, // '𞀍' cc=230 + {0x1E00E, 230}, // '𞀎' cc=230 + {0x1E00F, 230}, // '𞀏' cc=230 + {0x1E010, 230}, // '𞀐' cc=230 + {0x1E011, 230}, // '𞀑' cc=230 + {0x1E012, 230}, // '𞀒' cc=230 + {0x1E013, 230}, // '𞀓' cc=230 + {0x1E014, 230}, // '𞀔' cc=230 + {0x1E015, 230}, // '𞀕' cc=230 + {0x1E016, 230}, // '𞀖' cc=230 + {0x1E017, 230}, // '𞀗' cc=230 + {0x1E018, 230}, // '𞀘' cc=230 + {0x1E01B, 230}, // '𞀛' cc=230 + {0x1E01C, 230}, // '𞀜' cc=230 + {0x1E01D, 230}, // '𞀝' cc=230 + {0x1E01E, 230}, // '𞀞' cc=230 + {0x1E01F, 230}, // '𞀟' cc=230 + {0x1E020, 230}, // '𞀠' cc=230 + {0x1E021, 230}, // '𞀡' cc=230 + {0x1E023, 230}, // '𞀣' cc=230 + {0x1E024, 230}, // '𞀤' cc=230 + {0x1E026, 230}, // '𞀦' cc=230 + {0x1E027, 230}, // '𞀧' cc=230 + {0x1E028, 230}, // '𞀨' cc=230 + {0x1E029, 230}, // '𞀩' cc=230 + {0x1E02A, 230}, // '𞀪' cc=230 + {0x1E08F, 230}, // cc=230 + {0x1E130, 230}, // '𞄰' cc=230 + {0x1E131, 230}, // '𞄱' cc=230 + {0x1E132, 230}, // '𞄲' cc=230 + {0x1E133, 230}, // '𞄳' cc=230 + {0x1E134, 230}, // '𞄴' cc=230 + {0x1E135, 230}, // '𞄵' cc=230 + {0x1E136, 230}, // '𞄶' cc=230 + {0x1E2AE, 230}, // cc=230 + {0x1E2EC, 230}, // '𞋬' cc=230 + {0x1E2ED, 230}, // '𞋭' cc=230 + {0x1E2EE, 230}, // '𞋮' cc=230 + {0x1E2EF, 230}, // '𞋯' cc=230 + {0x1E4EC, 232}, // cc=232 + {0x1E4ED, 232}, // cc=232 + {0x1E4EE, 220}, // cc=220 + {0x1E4EF, 230}, // cc=230 + {0x1E5EE, 230}, // cc=230 + {0x1E5EF, 220}, // cc=220 + {0x1E6E3, 230}, // cc=230 + {0x1E6E6, 230}, // cc=230 + {0x1E6EE, 230}, // cc=230 + {0x1E6EF, 230}, // cc=230 + {0x1E6F5, 230}, // cc=230 + {0x1E8D0, 220}, // '𞣐' cc=220 + {0x1E8D1, 220}, // '𞣑' cc=220 + {0x1E8D2, 220}, // '𞣒' cc=220 + {0x1E8D3, 220}, // '𞣓' cc=220 + {0x1E8D4, 220}, // '𞣔' cc=220 + {0x1E8D5, 220}, // '𞣕' cc=220 + {0x1E8D6, 220}, // '𞣖' cc=220 + {0x1E944, 230}, // '𞥄' cc=230 + {0x1E945, 230}, // '𞥅' cc=230 + {0x1E946, 230}, // '𞥆' cc=230 + {0x1E947, 230}, // '𞥇' cc=230 + {0x1E948, 230}, // '𞥈' cc=230 + {0x1E949, 230}, // '𞥉' cc=230 + {0x1E94A, 7}, // '𞥊' cc=7 }; } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc.h index f22398283a..57a394f6c7 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode-nfc.h @@ -18,7 +18,7 @@ namespace tokenizers { * @param cpts Input codepoint sequence * @return NFD-normalized codepoint sequence */ -std::vector unicode_normalize_nfd(const std::vector &cpts); +std::vector unicode_normalize_nfd(const std::vector& cpts); /** * Normalize a sequence of codepoints to NFC (Canonical Decomposition + @@ -27,7 +27,7 @@ std::vector unicode_normalize_nfd(const std::vector &cpts); * @param cpts Input codepoint sequence * @return NFC-normalized codepoint sequence */ -std::vector unicode_normalize_nfc(const std::vector &cpts); +std::vector unicode_normalize_nfc(const std::vector& cpts); /** * Normalize a UTF-8 string to NFD @@ -35,7 +35,7 @@ std::vector unicode_normalize_nfc(const std::vector &cpts); * @param utf8 Input UTF-8 string * @return NFD-normalized UTF-8 string */ -std::string unicode_normalize_nfd_utf8(const std::string &utf8); +std::string unicode_normalize_nfd_utf8(const std::string& utf8); /** * Normalize a UTF-8 string to NFC @@ -43,6 +43,6 @@ std::string unicode_normalize_nfd_utf8(const std::string &utf8); * @param utf8 Input UTF-8 string * @return NFC-normalized UTF-8 string */ -std::string unicode_normalize_nfc_utf8(const std::string &utf8); +std::string unicode_normalize_nfc_utf8(const std::string& utf8); } // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode_utils.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode_utils.h new file mode 100644 index 0000000000..883c81220a --- /dev/null +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unicode_utils.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +// @lint-ignore-every LICENSELINT + +#pragma once + +#include +#include + +namespace tokenizers { +namespace unicode_utils { + +/// CJK Unified Ideographs and extensions. +/// Shared by BertPreTokenizer, BertNormalizer, and UnicodeScriptsPreTokenizer. +inline bool is_cjk(uint32_t c) { + return (c >= 0x4E00 && c <= 0x9FFF) || (c >= 0x3400 && c <= 0x4DBF) || + (c >= 0x20000 && c <= 0x2A6DF) || (c >= 0x2A700 && c <= 0x2B73F) || + (c >= 0x2B740 && c <= 0x2B81F) || (c >= 0x2B920 && c <= 0x2CEAF) || + (c >= 0xF900 && c <= 0xFAFF) || (c >= 0x2F800 && c <= 0x2FA1F); +} + +/// ASCII + Unicode punctuation check. +/// Shared by BertPreTokenizer and PunctuationPreTokenizer. +inline bool is_punctuation(uint32_t cp) { + if ((cp >= 33 && cp <= 47) || (cp >= 58 && cp <= 64) || + (cp >= 91 && cp <= 96) || (cp >= 123 && cp <= 126)) { + return true; + } + return unicode_cpt_flags(cp).is_punctuation; +} + +} // namespace unicode_utils +} // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unigram_model.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unigram_model.h new file mode 100644 index 0000000000..c3cdc3c04a --- /dev/null +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/unigram_model.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +// @lint-ignore-every LICENSELINT + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace tokenizers { + +/** + * Unigram (SentencePiece) model. + * + * Each vocabulary token has a log-probability score. Tokenization finds the + * segmentation that maximizes the sum of scores via the Viterbi algorithm. + * + * JSON format in tokenizer.json: + * "model": { + * "type": "Unigram", + * "unk_id": 0, + * "vocab": [["", 0.0], ["▁hello", -5.2], ...], + * "byte_fallback": true + * } + */ +class UnigramModel : public Model { + public: + explicit UnigramModel( + std::vector pieces, + std::vector scores, + detail::TokenMap special_token_map, + std::unique_ptr special_token_regex, + bool byte_fallback, + std::optional unk_token_id, + std::optional bos_token_id, + std::optional eos_token_id, + std::unordered_set rstrip_tokens = {}, + std::unordered_set lstrip_tokens = {}); + + ~UnigramModel() override = default; + + Result> tokenize( + const std::string& piece) const override; + + Result id_to_piece(uint64_t token) const override; + Result piece_to_id(const std::string& piece) const override; + + int32_t vocab_size() const override { + return vocab_size_; + } + + bool is_special_token(uint64_t token) const override; + + bool is_loaded() const override { + return initialized_; + } + + std::pair, std::string> + split_with_allowed_special_token(const std::string& input, size_t offset) + const override; + + bool special_token_has_rstrip(const std::string& token) const override { + return rstrip_tokens_.count(token) > 0; + } + bool special_token_has_lstrip(const std::string& token) const override { + return lstrip_tokens_.count(token) > 0; + } + + uint64_t bos_token_id() const override { + return bos_token_id_.value_or(0); + } + + uint64_t eos_token_id() const override { + return eos_token_id_.value_or(0); + } + + private: + // --- Trie for efficient prefix matching of vocabulary tokens --- + struct TrieNode { + std::unordered_map children; + int64_t token_id = -1; // -1 = no token ends at this node + }; + + void build_trie(); + + // Vocabulary: indexed by token id. + std::vector pieces_; + std::vector scores_; + std::unordered_map piece_to_id_map_; + + // Byte trie for prefix matching during Viterbi. + std::vector trie_; + + // Special-token handling (same pattern as BPEModel / WordPieceModel). + detail::TokenMap special_token_map_; + std::unique_ptr special_token_regex_; + + bool byte_fallback_ = false; + std::optional unk_token_id_; + std::optional bos_token_id_; + std::optional eos_token_id_; + std::unordered_set rstrip_tokens_; + std::unordered_set lstrip_tokens_; + + double min_score_ = 0.0; + int32_t vocab_size_ = 0; + bool initialized_ = false; +}; + +} // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordlevel_model.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordlevel_model.h new file mode 100644 index 0000000000..029da642d3 --- /dev/null +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordlevel_model.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +// @lint-ignore-every LICENSELINT + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace tokenizers { + +/** + * WordLevel model — the simplest tokenization model. + * + * Maps whole words directly to token IDs via vocabulary lookup. + * Unknown words map to unk_token_id. + * + * JSON format in tokenizer.json: + * "model": { + * "type": "WordLevel", + * "vocab": {"hello": 0, "world": 1, ...}, + * "unk_token": "[UNK]" + * } + */ +class WordLevelModel : public Model { + public: + explicit WordLevelModel( + detail::TokenMap token_map, + detail::TokenMap special_token_map, + std::unique_ptr special_token_regex, + std::optional unk_token_id, + std::optional bos_token_id, + std::optional eos_token_id, + std::unordered_set rstrip_tokens = {}, + std::unordered_set lstrip_tokens = {}); + + ~WordLevelModel() override = default; + + Result> tokenize( + const std::string& piece) const override; + + Result id_to_piece(uint64_t token) const override; + Result piece_to_id(const std::string& piece) const override; + + int32_t vocab_size() const override { + return vocab_size_; + } + + bool is_special_token(uint64_t token) const override; + + bool is_loaded() const override { + return initialized_; + } + + std::pair, std::string> + split_with_allowed_special_token(const std::string& input, size_t offset) + const override; + + bool special_token_has_rstrip(const std::string& token) const override { + return rstrip_tokens_.count(token) > 0; + } + bool special_token_has_lstrip(const std::string& token) const override { + return lstrip_tokens_.count(token) > 0; + } + + uint64_t bos_token_id() const override { + return bos_token_id_.value_or(0); + } + + uint64_t eos_token_id() const override { + return eos_token_id_.value_or(0); + } + + private: + detail::TokenMap token_map_; + detail::TokenMap special_token_map_; + std::unique_ptr special_token_regex_; + + std::optional unk_token_id_; + std::optional bos_token_id_; + std::optional eos_token_id_; + std::unordered_set rstrip_tokens_; + std::unordered_set lstrip_tokens_; + + bool initialized_ = false; + int32_t vocab_size_ = 0; +}; + +} // namespace tokenizers diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordpiece_model.h b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordpiece_model.h index 2d7a93f9f8..2528adb933 100644 --- a/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordpiece_model.h +++ b/packages/react-native-executorch/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordpiece_model.h @@ -22,43 +22,61 @@ namespace tokenizers { class WordPieceModel : public Model { -public: - explicit WordPieceModel(detail::TokenMap token_map, - detail::TokenMap special_token_map, - std::string unk_token, - std::string continuing_subword_prefix, - size_t max_input_chars_per_word, - std::optional unk_token_id, - std::optional bos_token_id, - std::optional eos_token_id); + public: + explicit WordPieceModel( + detail::TokenMap token_map, + detail::TokenMap special_token_map, + std::string unk_token, + std::string continuing_subword_prefix, + size_t max_input_chars_per_word, + std::optional unk_token_id, + std::optional bos_token_id, + std::optional eos_token_id, + std::unordered_set rstrip_tokens = {}, + std::unordered_set lstrip_tokens = {}); ~WordPieceModel() override = default; - Result> - tokenize(const std::string &piece) const override; + Result> tokenize( + const std::string& piece) const override; Result id_to_piece(uint64_t token) const override; - Result piece_to_id(const std::string &token) const override; + Result piece_to_id(const std::string& token) const override; - int32_t vocab_size() const override { return vocab_size_; } + int32_t vocab_size() const override { + return vocab_size_; + } bool is_special_token(uint64_t token) const override; - bool is_loaded() const override { return initialized_; } + bool is_loaded() const override { + return initialized_; + } std::pair, std::string> - split_with_allowed_special_token(const std::string &input, - size_t offset) const override; + split_with_allowed_special_token(const std::string& input, size_t offset) + const override; - uint64_t bos_token_id() const override { return bos_token_id_.value_or(0); } + uint64_t bos_token_id() const override { + return bos_token_id_.value_or(0); + } - uint64_t eos_token_id() const override { return eos_token_id_.value_or(0); } + uint64_t eos_token_id() const override { + return eos_token_id_.value_or(0); + } -private: + bool special_token_has_rstrip(const std::string& token) const override { + return rstrip_tokens_.count(token) > 0; + } + bool special_token_has_lstrip(const std::string& token) const override { + return lstrip_tokens_.count(token) > 0; + } + + private: detail::TokenMap token_map_; detail::TokenMap special_token_map_; std::unique_ptr special_token_regex_; - + std::string unk_token_; std::string continuing_subword_prefix_; size_t max_input_chars_per_word_; @@ -67,6 +85,9 @@ class WordPieceModel : public Model { std::optional bos_token_id_; std::optional eos_token_id_; + std::unordered_set rstrip_tokens_; + std::unordered_set lstrip_tokens_; + bool initialized_ = false; int32_t vocab_size_ = 0; }; diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/Info.plist b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/Info.plist index aaba93b395..6a6c556899 100644 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/Info.plist +++ b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/Info.plist @@ -8,7 +8,7 @@ BinaryPath ExecutorchLib.framework/ExecutorchLib LibraryIdentifier - ios-arm64-simulator + ios-arm64 LibraryPath ExecutorchLib.framework SupportedArchitectures @@ -17,14 +17,12 @@ SupportedPlatform ios - SupportedPlatformVariant - simulator BinaryPath ExecutorchLib.framework/ExecutorchLib LibraryIdentifier - ios-arm64 + ios-arm64-simulator LibraryPath ExecutorchLib.framework SupportedArchitectures @@ -33,6 +31,8 @@ SupportedPlatform ios + SupportedPlatformVariant + simulator CFBundlePackageType diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib index 1813eee9a7..f74ed53c6a 100755 Binary files a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib differ diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist index b2b2aa2478..bd0373672c 100644 Binary files a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist and b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist differ diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib index d1fe7f1fa6..61193b77ef 100755 Binary files a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib differ diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist index a6f2d4a5dc..2372838d49 100644 Binary files a/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist and b/packages/react-native-executorch/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_ios.a index a0579628b5..4e2a80a220 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_simulator.a index 23ef86eaf3..c5b16beb50 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_coreml_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_ios.a index 6f846c442e..ce63bfd904 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_simulator.a index 1991b4f6ed..3679faaa46 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_mps_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_ios.a index 63b250f392..4049c0a640 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_simulator.a index 2b482a78ac..bd145d474f 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libbackend_xnnpack_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_ios.a index 5897480997..221409be07 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_ios.a index d53d1d39af..40b74e9c7d 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_simulator.a index 2eb08c840e..bec9105acb 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_llm_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_simulator.a index cecd8b6146..99449364f2 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libexecutorch_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_ios.a index a401de3541..5217a54902 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_simulator.a index 2455377e31..9add6b355c 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_llm_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_ios.a index a671120ee4..9cc9a42bd2 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_simulator.a index ccc5272c20..cb06a86712 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_optimized_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_ios.a index 1bd8794acd..d54805247e 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_simulator.a index ebe32a9d72..41f06f64fb 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_quantized_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_ios.a index c3098fdb29..19db3e80f4 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_simulator.a index 4e909ef02b..af26c633e1 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libkernels_torchao_simulator.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_ios.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_ios.a index 7ec6ea3c81..8c14cf924b 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_ios.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_ios.a differ diff --git a/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_simulator.a b/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_simulator.a index 031e5068a7..d6deeb5a00 100644 Binary files a/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_simulator.a and b/packages/react-native-executorch/third-party/ios/libs/executorch/libthreadpool_simulator.a differ