From 09447e628121cd15d750b63ddd4c043a44438a96 Mon Sep 17 00:00:00 2001 From: Leonardo Parente <23251360+leoparente@users.noreply.github.com> Date: Tue, 17 Oct 2023 15:58:26 -0400 Subject: [PATCH] Add lib http client to pktvisor --- conanfile.txt | 2 + libs/CMakeLists.txt | 5 +- libs/visor_http_client/CMakeLists.txt | 37 ++ libs/visor_http_client/base64.cpp | 120 +++++ libs/visor_http_client/base64.h | 14 + libs/visor_http_client/http.h | 9 + libs/visor_http_client/httpssession.cpp | 391 ++++++++++++++ libs/visor_http_client/httpssession.h | 116 +++++ libs/visor_http_client/target.h | 11 + libs/visor_http_client/tcpsession.cpp | 100 ++++ libs/visor_http_client/tcpsession.h | 40 ++ libs/visor_http_client/test_driver.cpp | 163 ++++++ libs/visor_http_client/url_parser.c | 652 ++++++++++++++++++++++++ libs/visor_http_client/url_parser.h | 94 ++++ 14 files changed, 1753 insertions(+), 1 deletion(-) create mode 100644 libs/visor_http_client/CMakeLists.txt create mode 100644 libs/visor_http_client/base64.cpp create mode 100644 libs/visor_http_client/base64.h create mode 100644 libs/visor_http_client/http.h create mode 100644 libs/visor_http_client/httpssession.cpp create mode 100644 libs/visor_http_client/httpssession.h create mode 100644 libs/visor_http_client/target.h create mode 100644 libs/visor_http_client/tcpsession.cpp create mode 100644 libs/visor_http_client/tcpsession.h create mode 100644 libs/visor_http_client/test_driver.cpp create mode 100644 libs/visor_http_client/url_parser.c create mode 100644 libs/visor_http_client/url_parser.h diff --git a/conanfile.txt b/conanfile.txt index 190ca3b31..27936a5b8 100644 --- a/conanfile.txt +++ b/conanfile.txt @@ -3,9 +3,11 @@ catch2/3.4.0 corrade/2020.06 cpp-httplib/0.14.1 docopt.cpp/0.6.3 +gnutls/3.7.8 fast-cpp-csv-parser/cci.20211104 json-schema-validator/2.2.0 libmaxminddb/1.7.1 +libnghttp2/1.57.0 nlohmann_json/3.11.2 openssl/1.1.1w opentelemetry-proto/1.0.0 diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index e6480f56e..b8b5914a2 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -4,4 +4,7 @@ add_subdirectory(visor_test) add_subdirectory(visor_transaction) add_subdirectory(visor_tcp) add_subdirectory(visor_dns) -add_subdirectory(visor_utils) \ No newline at end of file +add_subdirectory(visor_utils) +if(NOT WIN32) + add_subdirectory(visor_http_client) +endif() \ No newline at end of file diff --git a/libs/visor_http_client/CMakeLists.txt b/libs/visor_http_client/CMakeLists.txt new file mode 100644 index 000000000..10097c029 --- /dev/null +++ b/libs/visor_http_client/CMakeLists.txt @@ -0,0 +1,37 @@ +message(STATUS "Visor Lib HTTP Client") + +add_library(VisorLibHttpClient + httpssession.cpp + tcpsession.cpp + url_parser.c + ) +add_library(Visor::Lib::HttpClient ALIAS VisorLibHttpClient) + + +target_include_directories(VisorLibHttpClient + PUBLIC + $ + ) + +target_link_libraries(VisorLibHttpClient + PUBLIC + ${CONAN_LIBS_LIBNGHTTP2} + ${CONAN_LIBS_LIBUV} + ${CONAN_LIBS_UVW} + + ${CONAN_LIBS_NETTLE} + ${CONAN_LIBS_GMP} + ${CONAN_LIBS_LIBICONV} + ${CONAN_LIBS_ZLIB} + ${CONAN_LIBS_ZSTD} + ${CONAN_LIBS_BROTLI} + ${CONAN_LIBS_GNUTLS} + ) + +## TEST SUITE +add_executable(unit-tests-visor-http-client test_driver.cpp) + +target_link_libraries(unit-tests-visor-http-client + PRIVATE + Visor::Lib::HttpClient + ${CONAN_LIBS_CATCH2}) \ No newline at end of file diff --git a/libs/visor_http_client/base64.cpp b/libs/visor_http_client/base64.cpp new file mode 100644 index 000000000..6eb8b1cc6 --- /dev/null +++ b/libs/visor_http_client/base64.cpp @@ -0,0 +1,120 @@ +/* + base64.cpp and base64.h + + base64 encoding and decoding with C++. + + Version: 1.01.00 + + Copyright (C) 2004-2017 René Nyffenegger + ALTERED by Jeroen Wijenbergh to allow for encoding of URLs + + This source code is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + + 3. This notice may not be removed or altered from any source distribution. + + René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +*/ + +#include "base64.h" +#include + +static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-_"; + + +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '-') || (c == '_')); +} + +std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) { + std::string ret; + int i = 0; + int j = 0; + unsigned char char_array_3[3]; + unsigned char char_array_4[4]; + + while (in_len--) { + char_array_3[i++] = *(bytes_to_encode++); + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for(i = 0; (i <4) ; i++) + ret += base64_chars[char_array_4[i]]; + i = 0; + } + } + + if (i) + { + for(j = i; j < 3; j++) + char_array_3[j] = '\0'; + + char_array_4[0] = ( char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + + for (j = 0; (j < i + 1); j++) + ret += base64_chars[char_array_4[j]]; + + } + + return ret; + +} + +std::string base64_decode(std::string const& encoded_string) { + size_t in_len = encoded_string.size(); + int i = 0; + int j = 0; + int in_ = 0; + unsigned char char_array_4[4], char_array_3[3]; + std::string ret; + + while (in_len-- && is_base64(encoded_string[in_])) { + char_array_4[i++] = encoded_string[in_]; in_++; + if (i ==4) { + for (i = 0; i <4; i++) + char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff; + + char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) + ret += char_array_3[i]; + i = 0; + } + } + + if (i) { + for (j = 0; j < i; j++) + char_array_4[j] = base64_chars.find(char_array_4[j]) & 0xff; + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + + for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; + } + + return ret; +} diff --git a/libs/visor_http_client/base64.h b/libs/visor_http_client/base64.h new file mode 100644 index 000000000..14db18499 --- /dev/null +++ b/libs/visor_http_client/base64.h @@ -0,0 +1,14 @@ +// +// base64 encoding and decoding with C++. +// Version: 1.01.00 +// + +#ifndef BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A +#define BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A + +#include + +std::string base64_encode(unsigned char const* , unsigned int len); +std::string base64_decode(std::string const& s); + +#endif /* BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A */ \ No newline at end of file diff --git a/libs/visor_http_client/http.h b/libs/visor_http_client/http.h new file mode 100644 index 000000000..d6caf750d --- /dev/null +++ b/libs/visor_http_client/http.h @@ -0,0 +1,9 @@ +#ifndef FLAMETHROWER_HTTP_H +#define FLAMETHROWER_HTTP_H + +enum class HTTPMethod { + POST, + GET, +}; + +#endif //FLAMETHROWER_HTTP_H \ No newline at end of file diff --git a/libs/visor_http_client/httpssession.cpp b/libs/visor_http_client/httpssession.cpp new file mode 100644 index 000000000..915a5223c --- /dev/null +++ b/libs/visor_http_client/httpssession.cpp @@ -0,0 +1,391 @@ +#include +#include +#include + +#include "httpssession.h" + +static ssize_t gnutls_pull_trampoline(gnutls_transport_ptr_t h, void *buf, size_t len) +{ + auto session = static_cast(h); + return session->gnutls_pull(buf, len); +} + +static ssize_t gnutls_push_trampoline(gnutls_transport_ptr_t h, const void *buf, size_t len) +{ + auto session = static_cast(h); + return session->gnutls_push(buf, len); +} + +HTTPSSession::HTTPSSession(std::shared_ptr handle, + TCPSession::malformed_data_cb malformed_data_handler, + TCPSession::got_dns_msg_cb got_dns_msg_handler, + TCPSession::connection_ready_cb connection_ready_handler, + handshake_error_cb handshake_error_handler, + Target target, + HTTPMethod method) + : TCPSession(handle, malformed_data_handler, got_dns_msg_handler, connection_ready_handler) + , http2_state{STATE_HTTP2::WAIT_SETTINGS} + , _malformed_data{malformed_data_handler} + , _got_dns_msg{got_dns_msg_handler} + , _handle{handle} + , _tls_state{LinkState::HANDSHAKE} + , _handshake_error{handshake_error_handler} + , _target{std::move(target)} + , _method{method} + , _current_session{nullptr} +{ +} + +HTTPSSession::~HTTPSSession() +{ + gnutls_certificate_free_credentials(_gnutls_cert_credentials); + gnutls_deinit(_gnutls_session); + nghttp2_session_del(_current_session); +} + +std::unique_ptr HTTPSSession::create_http2_stream_data(std::unique_ptr data, size_t len) +{ + std::string uri = _target.uri; + struct http_parser_url *u = _target.parsed; + std::string scheme(&uri[u->field_data[UF_SCHEMA].off], u->field_data[UF_SCHEMA].len); + std::string authority(&uri[u->field_data[UF_HOST].off], u->field_data[UF_HOST].len); + std::string path(&uri[u->field_data[UF_PATH].off], u->field_data[UF_PATH].len); + int32_t stream_id = -1; + if (_method == HTTPMethod::GET) { + path.append("?dns="); + path.append(data.get(), len); + } + std::string streamData(data.get(), len); + auto root = std::make_unique(scheme, authority, path, stream_id, streamData); + return root; +} +#define ARRLEN(x) (sizeof(x) / sizeof(x[0])) + +static ssize_t send_callback([[maybe_unused]] nghttp2_session *session, const uint8_t *data, + size_t length, [[maybe_unused]] int flags, void *user_data) +{ + auto class_session = static_cast(user_data); + class_session->send_tls((void *)data, length); + return (ssize_t)length; +} + +void HTTPSSession::destroy_session() +{ + gnutls_certificate_free_credentials(_gnutls_cert_credentials); + gnutls_deinit(_gnutls_session); + nghttp2_session_del(_current_session); +} + +void HTTPSSession::process_receive(const uint8_t *data, size_t len) +{ + // dnsheader is 12, at least one byte for the minimum name, + // two bytes for the qtype and another two for the qclass + const size_t MIN_DNS_RESPONSE_SIZE = 17; + // 512 over UDP without EDNS, but 65535 over TCP + const size_t MAX_DNS_RESPONSE_SIZE = 65535; + if (len < MIN_DNS_RESPONSE_SIZE || len > MAX_DNS_RESPONSE_SIZE) { + std::cerr << "malformed data" << std::endl; + _malformed_data(); + return; + } + auto buf = std::make_unique(len); + memcpy(buf.get(), (const char *)data, len); + _got_dns_msg(std::move(buf), len); +} + +static int on_data_chunk_recv_callback(nghttp2_session *session, [[maybe_unused]] uint8_t flags, + int32_t stream_id, const uint8_t *data, + size_t len, void *user_data) +{ + auto class_session = static_cast(user_data); + auto req = nghttp2_session_get_stream_user_data(session, stream_id); + if (!req) { + std::cerr << "No stream data on data chunk" << std::endl; + return 0; + } + auto existing = class_session->_recv_chunks.find(stream_id); + if (existing != class_session->_recv_chunks.end()) { + class_session->_recv_chunks[stream_id].insert(class_session->_recv_chunks[stream_id].end(), data, data + len); + } else { + class_session->_recv_chunks[stream_id] = std::vector(data, data + len); + } + return 0; +} + +static int on_stream_close_callback(nghttp2_session *session, int32_t stream_id, [[maybe_unused]] uint32_t error_code, + [[maybe_unused]] void *user_data) +{ + auto stream_data = static_cast(nghttp2_session_get_stream_user_data(session, stream_id)); + if (!stream_data) { + std::cerr << "No stream data on stream close" << std::endl; + return 0; + } + nghttp2_session_terminate_session(session, NGHTTP2_NO_ERROR); + return 0; +} + +int on_frame_recv_callback([[maybe_unused]] nghttp2_session *session, + const nghttp2_frame *frame, void *user_data) +{ + auto class_session = static_cast(user_data); + switch (frame->hd.type) { + case NGHTTP2_SETTINGS: + class_session->settings_received(); + break; + case NGHTTP2_DATA: + if (frame->hd.flags & NGHTTP2_FLAG_END_STREAM) { + auto data = class_session->_recv_chunks[frame->data.hd.stream_id]; + class_session->process_receive(data.data(), data.size()); + } + } + return 0; +} + +void HTTPSSession::init_nghttp2() +{ + nghttp2_session_callbacks *callbacks; + nghttp2_session_callbacks_new(&callbacks); + nghttp2_session_callbacks_set_send_callback(callbacks, send_callback); + nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, on_data_chunk_recv_callback); + nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, on_stream_close_callback); + nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, on_frame_recv_callback); + nghttp2_session_client_new(&_current_session, callbacks, this); + nghttp2_session_callbacks_del(callbacks); +} + +bool HTTPSSession::setup() +{ + int ret; + + ret = gnutls_init(&_gnutls_session, GNUTLS_CLIENT | GNUTLS_NONBLOCK); + if (ret != GNUTLS_E_SUCCESS) { + std::cerr << "GNUTLS init failed: " << gnutls_strerror(ret) << std::endl; + return false; + } + + ret = gnutls_set_default_priority(_gnutls_session); + if (ret != GNUTLS_E_SUCCESS) { + std::cerr << "GNUTLS failed to set default priority: " << gnutls_strerror(ret) << std::endl; + return false; + } + + ret = gnutls_certificate_allocate_credentials(&_gnutls_cert_credentials); + if (ret < 0) { + std::cerr << "GNUTLS failed to allocate credentials: " << gnutls_strerror(ret) << std::endl; + return false; + } + + ret = gnutls_certificate_set_x509_system_trust(_gnutls_cert_credentials); + if (ret < 0) { + std::cerr << "GNUTLS failed to set system trust: " << gnutls_strerror(ret) << std::endl; + return false; + } + + ret = gnutls_credentials_set(_gnutls_session, GNUTLS_CRD_CERTIFICATE, + _gnutls_cert_credentials); + if (ret < 0) { + std::cerr << "GNUTLS failed to set system credentials" << gnutls_strerror(ret) << std::endl; + return false; + } + + gnutls_datum_t alpn; + alpn.data = (unsigned char *)"h2"; + alpn.size = 2; + ret = gnutls_alpn_set_protocols(_gnutls_session, &alpn, 1, GNUTLS_ALPN_MANDATORY); + if (ret != GNUTLS_E_SUCCESS) { + std::cerr << "GNUTLS failed to set ALPN: " << gnutls_strerror(ret) << std::endl; + return false; + } + + gnutls_transport_set_pull_function(_gnutls_session, gnutls_pull_trampoline); + gnutls_transport_set_push_function(_gnutls_session, gnutls_push_trampoline); + gnutls_handshake_set_timeout(_gnutls_session, GNUTLS_DEFAULT_HANDSHAKE_TIMEOUT); + gnutls_transport_set_ptr(_gnutls_session, this); + return true; +} + +void HTTPSSession::send_settings() +{ + nghttp2_settings_entry settings[1] = {{NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, (1U << 31) - 1}}; + int val; + val = nghttp2_submit_settings(_current_session, NGHTTP2_FLAG_NONE, settings, ARRLEN(settings)); + if (val != 0) { + std::cerr << "Could not submit SETTINGS frame: " << nghttp2_strerror(val) << std::endl; + } +} + +void HTTPSSession::settings_received() +{ + if (http2_state == STATE_HTTP2::WAIT_SETTINGS) { + TCPSession::on_connect_event(); + http2_state = STATE_HTTP2::SENDING_DATA; + } +} + +void HTTPSSession::receive_response(const char data[], size_t len) +{ + ssize_t stream_id = nghttp2_session_mem_recv(_current_session, (const uint8_t *)data, len); + if (stream_id < 0) { + std::cerr << "Could not get HTTP2 request: " << nghttp2_strerror(stream_id); + close(); + return; + } +} + +int HTTPSSession::session_send() +{ + int rv; + rv = nghttp2_session_send(_current_session); + if (rv != 0) { + std::cerr << "HTTP2 fatal error: " << nghttp2_strerror(rv); + return -1; + } + return 0; +} + +void HTTPSSession::on_connect_event() +{ + _current_session = {}; + do_handshake(); +} + +void HTTPSSession::close() +{ + _tls_state = LinkState::CLOSE; + gnutls_bye(_gnutls_session, GNUTLS_SHUT_WR); + TCPSession::close(); +} + +static ssize_t post_data(nghttp2_session *session, int32_t stream_id, uint8_t *buf, size_t length, + uint32_t *data_flags, [[maybe_unused]] nghttp2_data_source *source, [[maybe_unused]] void *user_data) +{ + auto stream_data = static_cast(nghttp2_session_get_stream_user_data(session, stream_id)); + size_t nread = std::min(stream_data->data.size(), length); + memcpy(buf, stream_data->data.c_str(), nread); + *data_flags = NGHTTP2_DATA_FLAG_EOF; + return nread; +} + +#define HDR_S(NAME, VALUE) \ + { \ + (uint8_t *)NAME, (uint8_t *)VALUE.c_str(), sizeof(NAME) - 1, VALUE.size(), \ + NGHTTP2_NV_FLAG_NONE \ + } + +void HTTPSSession::write(std::unique_ptr data, size_t len) +{ + int32_t stream_id; + auto stream_data = create_http2_stream_data(std::move(data), len); + nghttp2_data_provider provider = {}; + + std::string method = _method == HTTPMethod::GET ? "GET" : "POST"; + std::string content = "application/dns-message"; + std::vector hdrs{ + HDR_S(":method", method), + HDR_S(":scheme", stream_data->scheme), + HDR_S(":authority", stream_data->authority), + HDR_S(":path", stream_data->path), + HDR_S("accept", content)}; + if (_method == HTTPMethod::POST) { + hdrs.push_back(HDR_S("content-type", content)); + hdrs.push_back(HDR_S("content-length", std::to_string(len))); + provider.read_callback = post_data; + } + + stream_id = nghttp2_submit_request(_current_session, NULL, hdrs.data(), hdrs.size(), &provider, stream_data.get()); + if (stream_id < 0) { + std::cerr << "Could not submit HTTP request: " << nghttp2_strerror(stream_id); + } + + stream_data->id = stream_id; + + if (session_send() != 0) { + std::cerr << "HTTP2 failed to send" << std::endl; + } +} + +void HTTPSSession::receive_data(const char data[], size_t _len) +{ + _pull_buffer.append(data, _len); + switch (_tls_state) { + case LinkState::HANDSHAKE: + do_handshake(); + break; + case LinkState::DATA: + char buf[16384]; + for (;;) { + ssize_t len = gnutls_record_recv(_gnutls_session, buf, sizeof(buf)); + if (len > 0) { + receive_response(buf, len); + } else { + if (len == GNUTLS_E_AGAIN) { + // Check if we don't have any data left to read + if (_pull_buffer.empty()) { + break; + } + continue; + } else if (len == GNUTLS_E_INTERRUPTED) { + continue; + } + break; + } + } + break; + case LinkState::CLOSE: + break; + } +} + +void HTTPSSession::send_tls(void *data, size_t len) +{ + ssize_t sent = gnutls_record_send(_gnutls_session, data, len); + if (sent <= 0) { + std::cerr << "HTTP2 failed in sending data" << std::endl; + } +} + +void HTTPSSession::do_handshake() +{ + int err = gnutls_handshake(_gnutls_session); + if (err == GNUTLS_E_SUCCESS) { + gnutls_datum_t alpn; + alpn.data = (unsigned char *)"h2"; + alpn.size = 2; + int ret = gnutls_alpn_get_selected_protocol(_gnutls_session, &alpn); + if (ret != GNUTLS_E_SUCCESS) { + std::cerr << "Cannot get alpn" << std::endl; + close(); + } + init_nghttp2(); + send_settings(); + if (session_send() != 0) { + std::cerr << "Cannot submit settings frame" << std::endl; + } + _tls_state = LinkState::DATA; + } else if (err < 0 && gnutls_error_is_fatal(err)) { + std::cerr << "Handshake failed: " << gnutls_strerror(err) << std::endl; + _handshake_error(); + } else if (err != GNUTLS_E_AGAIN && err != GNUTLS_E_INTERRUPTED) { + std::cout << "Handshake " << gnutls_strerror(err) << std::endl; + } +} + +int HTTPSSession::gnutls_pull(void *buf, size_t len) +{ + if (!_pull_buffer.empty()) { + len = std::min(len, _pull_buffer.size()); + std::memcpy(buf, _pull_buffer.data(), len); + _pull_buffer.erase(0, len); + return len; + } + errno = EAGAIN; + return -1; +} + +int HTTPSSession::gnutls_push(const void *buf, size_t len) +{ + auto data = std::make_unique(len); + memcpy(data.get(), const_cast(reinterpret_cast(buf)), len); + TCPSession::write(std::move(data), len); + return len; +} diff --git a/libs/visor_http_client/httpssession.h b/libs/visor_http_client/httpssession.h new file mode 100644 index 000000000..56f8f2114 --- /dev/null +++ b/libs/visor_http_client/httpssession.h @@ -0,0 +1,116 @@ +#pragma once + +#include + +#include +#include + +#include "base64.h" +#include "http.h" +#include "target.h" +#include "tcpsession.h" +#include "url_parser.h" + +struct http2_stream_data { + http2_stream_data(std::string _scheme, std::string _authority, std::string _path, int32_t _id, std::string _data) + : scheme(_scheme) + , authority(_authority) + , path(_path) + , id(_id) + , data(_data) + { + } + + std::string scheme; + std::string authority; + std::string path; + int32_t id; + std::string data; +}; + +enum STATE_HTTP2 { + WAIT_SETTINGS, + SENDING_DATA +}; + +class HTTPSSession : public TCPSession +{ +public: + using log_send_cb = std::function; + using handshake_error_cb = std::function; + + HTTPSSession(std::shared_ptr handle, + TCPSession::malformed_data_cb malformed_data_handler, + TCPSession::got_dns_msg_cb got_dns_msg_handler, + TCPSession::connection_ready_cb connection_ready_handler, + handshake_error_cb handshake_error_handler, + Target target, + HTTPMethod method); + + ~HTTPSSession() override; + + virtual bool setup() override; + + virtual void on_connect_event() override; + + void send_tls(void *data, size_t len); + + void init_nghttp2(); + + void send_settings(); + + void receive_response(const char data[], size_t len); + + int session_send(); + + int session_receive(); + + virtual void close(); + + virtual void receive_data(const char data[], size_t len); + + virtual void write(std::unique_ptr data, size_t len); + + void process_receive(const uint8_t *data, size_t len); + + int gnutls_pull(void *buf, size_t len); + + int gnutls_push(const void *buf, size_t len); + + std::unique_ptr create_http2_stream_data(std::unique_ptr data, size_t len); + + void add_stream(http2_stream_data *stream_data); + + void remove_stream(http2_stream_data *stream_data); + + void settings_received(); + + std::unordered_map> _recv_chunks; + +protected: + void destroy_stream(); + + void destroy_session(); + + void do_handshake(); + +private: + STATE_HTTP2 http2_state; + malformed_data_cb _malformed_data; + got_dns_msg_cb _got_dns_msg; + std::shared_ptr _handle; + enum class LinkState { + HANDSHAKE, + DATA, + CLOSE + } _tls_state; + handshake_error_cb _handshake_error; + Target _target; + HTTPMethod _method; + + nghttp2_session *_current_session; + std::string _pull_buffer; + + gnutls_session_t _gnutls_session; + gnutls_certificate_credentials_t _gnutls_cert_credentials; +}; diff --git a/libs/visor_http_client/target.h b/libs/visor_http_client/target.h new file mode 100644 index 000000000..50d889583 --- /dev/null +++ b/libs/visor_http_client/target.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +struct http_parser_url; + +struct Target { + http_parser_url *parsed; + std::string address; + std::string uri; +}; diff --git a/libs/visor_http_client/tcpsession.cpp b/libs/visor_http_client/tcpsession.cpp new file mode 100644 index 000000000..a8e6b0a90 --- /dev/null +++ b/libs/visor_http_client/tcpsession.cpp @@ -0,0 +1,100 @@ + +#include "tcpsession.h" + +#include +#include +#include + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +#include +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +TCPSession::TCPSession(std::shared_ptr handle, + malformed_data_cb malformed_data_handler, + got_dns_msg_cb got_dns_msg_handler, + connection_ready_cb connection_ready_handler) + : _handle{std::move(handle)} + , _malformed_data{std::move(malformed_data_handler)} + , _got_dns_msg{std::move(got_dns_msg_handler)} + , _connection_ready{std::move(connection_ready_handler)} +{ +} + +// do any pre-connection setup, return true if all OK. +bool TCPSession::setup() +{ + return true; +} + +void TCPSession::on_connect_event() +{ + _connection_ready(); +} + +// remote peer closed connection +void TCPSession::on_end_event() +{ + _handle->close(); +} + +// all local writes now finished +void TCPSession::on_shutdown_event() +{ + _handle->close(); +} + +// gracefully terminate the session +void TCPSession::close() +{ + _handle->stop(); + _handle->shutdown(); +} + +// accumulate data and try to extract DNS messages +void TCPSession::receive_data(const char data[], size_t len) +{ + // dnsheader is 12, at least one byte for the minimum name, + // two bytes for the qtype and another two for the qclass + const size_t MIN_DNS_RESPONSE_SIZE = 17; + + _buffer.append(data, len); + + for (;;) { + std::uint16_t size; + + if (_buffer.size() < sizeof(size)) + break; + + // size is in network byte order. + size = static_cast(_buffer[1]) | static_cast(_buffer[0]) << 8; + + // no need to check the maximum size here since the maximum size + // that a std::uint16t_t can hold, std::numeric_limits::max() + // (65535 bytes) is allowed over TCP + if (size < MIN_DNS_RESPONSE_SIZE) { + _malformed_data(); + break; + } + + if (_buffer.size() >= sizeof(size) + size) { + auto data = std::make_unique(size); + std::memcpy(data.get(), _buffer.data() + sizeof(size), size); + _buffer.erase(0, sizeof(size) + size); + _got_dns_msg(std::move(data), size); + } else { + // Nope, we need more data. + break; + } + } +} + +// send data, giving data ownership to async library +void TCPSession::write(std::unique_ptr data, size_t len) +{ + _handle->write(std::move(data), len); +} diff --git a/libs/visor_http_client/tcpsession.h b/libs/visor_http_client/tcpsession.h new file mode 100644 index 000000000..490e8adfd --- /dev/null +++ b/libs/visor_http_client/tcpsession.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace uvw { +class tcp_handle; +} + +class TCPSession +{ +public: + using malformed_data_cb = std::function; + using got_dns_msg_cb = std::function data, size_t size)>; + using connection_ready_cb = std::function; + + TCPSession(std::shared_ptr handle, + malformed_data_cb malformed_data_handler, + got_dns_msg_cb got_dns_msg_handler, + connection_ready_cb connection_ready_handler); + virtual ~TCPSession() = default; + + virtual bool setup(); + + virtual void on_connect_event(); + virtual void on_end_event(); + virtual void on_shutdown_event(); + + virtual void close(); + virtual void receive_data(const char data[], size_t len); + virtual void write(std::unique_ptr data, size_t len); + +private: + std::string _buffer; + std::shared_ptr _handle; + malformed_data_cb _malformed_data; + got_dns_msg_cb _got_dns_msg; + connection_ready_cb _connection_ready; +}; diff --git a/libs/visor_http_client/test_driver.cpp b/libs/visor_http_client/test_driver.cpp new file mode 100644 index 000000000..79ad72440 --- /dev/null +++ b/libs/visor_http_client/test_driver.cpp @@ -0,0 +1,163 @@ +#include "uvw/util.h" +#include + +#include + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +#include +#include +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#include "httpssession.h" +#include "target.h" + +void connect_tcp_events(std::shared_ptr tcp_handle, std::shared_ptr tcp_session) +{ + /** SOCKET CALLBACKS **/ + + // SOCKET: local socket was closed, cleanup resources and possibly restart another connection + tcp_handle->on([&tcp_handle, &tcp_session](uvw::close_event &, uvw::tcp_handle &) { + if (tcp_handle) { + tcp_handle->stop(); + } + tcp_session.reset(); + tcp_handle.reset(); + }); + + // SOCKET: socket error + tcp_handle->on([&tcp_handle, &tcp_session](uvw::error_event &event, uvw::tcp_handle &) { + std::cout << "error_event: " << tcp_handle->sock().ip << ":" << tcp_handle->sock().port << " - " << event.what() << std::endl; + tcp_handle->close(); + }); + + // INCOMING: remote peer closed connection, EOF + tcp_handle->on([&tcp_session](uvw::end_event &, uvw::tcp_handle &) { + std::cout << "end_event" << std::endl; + tcp_session->on_end_event(); + }); + + // OUTGOING: we've finished writing all our data and are shutting down + tcp_handle->on([&tcp_session](uvw::shutdown_event &, uvw::tcp_handle &) { + std::cout << "shutdown_event" << std::endl; + tcp_session->on_shutdown_event(); + }); + + // INCOMING: remote peer sends data, pass to session + tcp_handle->on([&tcp_session](uvw::data_event &event, uvw::tcp_handle &) { + std::cout << "data_event" << std::endl; + tcp_session->receive_data(event.data.get(), event.length); + }); + + // OUTGOING: write operation has finished + tcp_handle->on([](uvw::write_event &, uvw::tcp_handle &) { + std::cout << "WriteEvent" << std::endl; + }); + + // SOCKET: on connect + tcp_handle->on([&tcp_handle, &tcp_session](uvw::connect_event &, uvw::tcp_handle &) { + std::cout << "ConnectEvent" << std::endl; + tcp_session->on_connect_event(); + + // start reading from incoming stream, fires data_event when receiving + tcp_handle->read(); + }); +} + +TEST_CASE("HTTP Client", "[http]") +{ + auto loop = uvw::loop::get_default(); + + auto family = AF_INET; + + std::vector target_list; + std::vector raw_target_list; + raw_target_list.emplace_back("https://google.com"); + auto request = loop->resource(); + for (const auto &i : raw_target_list) { + uvw::socket_address addr; + struct http_parser_url parsed = {}; + std::string url = i; + if (url.rfind("https://", 0) != 0) { + url.insert(0, "https://"); + } + int ret = http_parser_parse_url(url.c_str(), strlen(url.c_str()), 0, &parsed); + if (ret != 0) { + std::cerr << "could not parse url: " << url << std::endl; + } + std::string authority(&url[parsed.field_data[UF_HOST].off], parsed.field_data[UF_HOST].len); + + auto target_resolved = request->addr_info_sync(authority, "443"); + if (!target_resolved.first) { + std::cerr << "unable to resolve target address: " << authority << std::endl; + if (i == "file") { + std::cerr << "(did you mean to include --targets?)" << std::endl; + } + } + addrinfo *node{target_resolved.second.get()}; + while (node && node->ai_family != family) { + node = node->ai_next; + } + if (!node) { + std::cerr << "name did not resolve to valid IP address for this inet family: " << i + << std::endl; + } + + if (family == AF_INET) { + char buffer[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, &reinterpret_cast(node->ai_addr)->sin_addr, buffer, INET_ADDRSTRLEN); + addr.ip = buffer; + } else if (family == AF_INET6) { + char buffer[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, &reinterpret_cast(node->ai_addr)->sin6_addr, buffer, INET6_ADDRSTRLEN); + addr.ip = buffer; + } + target_list.push_back({&parsed, addr.ip, url}); + } + + // --- + + std::shared_ptr tcp_session; + auto tcp_handle = loop->resource(family); + + auto malformed_data = [tcp_handle]() { + std::cout << "malformed_data" << std::endl; + tcp_handle->close(); + }; + auto got_dns_message = []([[maybe_unused]] std::unique_ptr data, + [[maybe_unused]] size_t size) { + std::cout << "got_dns_message" << std::endl; + // process_wire(data.get(), size); + }; + auto connection_ready = [tcp_session]() { + /** SEND DATA **/ + std::cout << "connection_ready" << std::endl; + // tcp_session->write(std::move(std::get<0>(qt)), std::get<1>(qt)); + }; + + tcp_session = std::make_shared(tcp_handle, malformed_data, got_dns_message, connection_ready, + malformed_data, target_list[0], HTTPMethod::GET); + connect_tcp_events(tcp_handle, tcp_session); + auto client = std::make_shared(tcp_handle, + nullptr, + nullptr, + nullptr, + nullptr, + target_list[0], + HTTPMethod::GET); + if (!tcp_session->setup()) { + std::cout << "setup failed" << std::endl; + } + std::cout << "connecting to " << target_list[0].address << ":" << 443 << std::endl; + tcp_handle->connect(target_list[0].address, 443); + + // ---- + loop->run(); + loop = nullptr; + + std::cout << "Hello, World!" << std::endl; +} diff --git a/libs/visor_http_client/url_parser.c b/libs/visor_http_client/url_parser.c new file mode 100644 index 000000000..4912ee206 --- /dev/null +++ b/libs/visor_http_client/url_parser.c @@ -0,0 +1,652 @@ +/* Copyright Joyent, Inc. and other Node contributors. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "url_parser.h" +#include +#include +#include +#include +#include + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_http_major + , s_res_http_dot + , s_res_http_minor + , s_res_http_end + , s_res_first_status_code + , s_res_status_code + , s_res_status_start + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_http_I + , s_req_http_IC + , s_req_http_major + , s_req_http_dot + , s_req_http_minor + , s_req_http_end + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) + +#if HTTP_PARSER_STRICT +#define TOKEN(c) STRICT_TOKEN(c) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) tokens[(unsigned char)c] +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* fall through */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* fall through */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = (uint16_t)(p - buf); + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = (uint16_t)(p - buf); + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + if (buflen == 0) { + return 1; + } + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* fall through */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = (uint16_t)(p - buf); + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + uint16_t off; + uint16_t len; + const char* p; + const char* end; + unsigned long v; + + off = u->field_data[UF_PORT].off; + len = u->field_data[UF_PORT].len; + end = buf + off + len; + + /* NOTE: The characters are already validated and are in the [0-9] range */ + assert(off + len <= buflen && "Port number overflow"); + v = 0; + for (p = buf + off; p < end; p++) { + v *= 10; + v += *p - '0'; + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + } + + u->port = (uint16_t) v; + } + + return 0; +} diff --git a/libs/visor_http_client/url_parser.h b/libs/visor_http_client/url_parser.h new file mode 100644 index 000000000..78b3096c5 --- /dev/null +++ b/libs/visor_http_client/url_parser.h @@ -0,0 +1,94 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef url_parser_h +#define url_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 9 +#define HTTP_PARSER_VERSION_PATCH 1 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && \ + (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) +#include +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); +#ifdef __cplusplus +} +#endif +#endif