From e353a1db208988366a6395b4bb1be3fee73bd959 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Wed, 21 Aug 2013 14:39:54 -0700 Subject: [PATCH 1/2] Support for unknown extension methods in the http parser.. implemented in a way that does not slow the parser down. --- .gitignore | 4 ++++ http_parser.c | 59 +++++++++++++++++++++++++++++---------------------- http_parser.h | 3 +++ test.c | 20 +++++++++++++++++ 4 files changed, 61 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 5d12d158..0a41cbd1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ parsertrace_g *.Makefile *.so.* *.a + +.cproject + +.project diff --git a/http_parser.c b/http_parser.c index c87186f3..6899b7aa 100644 --- a/http_parser.c +++ b/http_parser.c @@ -81,14 +81,14 @@ do { \ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) /* Run data callback FOR with LEN bytes, returning ER if it fails */ -#define CALLBACK_DATA_(FOR, LEN, ER) \ +#define CALLBACK_DATA_(FOR, LEN, ER, ERR) \ do { \ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ \ if (FOR##_mark) { \ if (settings->on_##FOR) { \ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ - SET_ERRNO(HPE_CB_##FOR); \ + SET_ERRNO(ERR); \ } \ \ /* We either errored above or got paused; get out */ \ @@ -102,11 +102,11 @@ do { \ /* Run the data callback FOR and consume the current byte */ #define CALLBACK_DATA(FOR) \ - CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1, HPE_CB_##FOR) /* Run the data callback FOR and don't consume the current byte */ #define CALLBACK_DATA_NOADVANCE(FOR) \ - CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data, HPE_CB_##FOR) /* Set the mark FOR; non-destructive if mark is already set */ #define MARK(FOR) \ @@ -579,6 +579,7 @@ size_t http_parser_execute (http_parser *parser, const char *p = data; const char *header_field_mark = 0; const char *header_value_mark = 0; + const char *unknown_method_mark = 0; const char *url_mark = 0; const char *body_mark = 0; @@ -613,6 +614,8 @@ size_t http_parser_execute (http_parser *parser, header_field_mark = data; if (parser->state == s_header_value) header_value_mark = data; + if (parser->state == s_start_req) + unknown_method_mark = data; switch (parser->state) { case s_req_path: case s_req_schema: @@ -900,8 +903,7 @@ size_t http_parser_execute (http_parser *parser, case 'T': parser->method = HTTP_TRACE; break; case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; default: - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } parser->state = s_req_method; @@ -917,10 +919,12 @@ size_t http_parser_execute (http_parser *parser, SET_ERRNO(HPE_INVALID_METHOD); goto error; } - matcher = method_strings[parser->method]; - if (ch == ' ' && matcher[parser->index] == '\0') { + + if (ch == ' ' && (parser->method == HTTP_unknown || matcher[parser->index] == '\0')) { parser->state = s_req_spaces_before_url; + } else if (parser->method == HTTP_unknown) { + ; /* nada */ } else if (ch == matcher[parser->index]) { ; /* nada */ } else if (parser->method == HTTP_CONNECT) { @@ -929,8 +933,7 @@ size_t http_parser_execute (http_parser *parser, } else if (parser->index == 2 && ch == 'P') { parser->method = HTTP_COPY; } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } } else if (parser->method == HTTP_MKCOL) { if (parser->index == 1 && ch == 'O') { @@ -942,15 +945,13 @@ size_t http_parser_execute (http_parser *parser, } else if (parser->index == 2 && ch == 'A') { parser->method = HTTP_MKACTIVITY; } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } } else if (parser->method == HTTP_SUBSCRIBE) { if (parser->index == 1 && ch == 'E') { parser->method = HTTP_SEARCH; } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } } else if (parser->index == 1 && parser->method == HTTP_POST) { if (ch == 'R') { @@ -960,33 +961,28 @@ size_t http_parser_execute (http_parser *parser, } else if (ch == 'A') { parser->method = HTTP_PATCH; } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } } else if (parser->index == 2) { if (parser->method == HTTP_PUT) { if (ch == 'R') { parser->method = HTTP_PURGE; } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } } else if (parser->method == HTTP_UNLOCK) { if (ch == 'S') { parser->method = HTTP_UNSUBSCRIBE; } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { parser->method = HTTP_PROPPATCH; } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; + parser->method = HTTP_unknown; } ++parser->index; @@ -995,6 +991,19 @@ size_t http_parser_execute (http_parser *parser, case s_req_spaces_before_url: { + // Check if the method is unknown, if it is, send it to the + // unknown method callback handler (if set). If that callback + // returns != 0, specify an HPE_INVALID_METHOD error. Otherwise, + // just accept it and keep going. + if (parser->method == HTTP_unknown) { + if (settings->on_unknown_method) { + CALLBACK_DATA_(unknown_method, p - unknown_method_mark - 1, p - data, HPE_INVALID_METHOD); + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } + if (ch == ' ') break; MARK(url); @@ -1687,7 +1696,7 @@ size_t http_parser_execute (http_parser *parser, * complete-on-length. It's not clear that this distinction is * important for applications, but let's keep it for now. */ - CALLBACK_DATA_(body, p - body_mark + 1, p - data); + CALLBACK_DATA_(body, p - body_mark + 1, p - data, HPE_CB_body); goto reexecute_byte; } diff --git a/http_parser.h b/http_parser.h index 98c0905b..391aaf9d 100644 --- a/http_parser.h +++ b/http_parser.h @@ -110,6 +110,7 @@ typedef int (*http_cb) (http_parser*); /* RFC-5789 */ \ XX(24, PATCH, PATCH) \ XX(25, PURGE, PURGE) \ + XX(99, unknown, unknown) \ enum http_method { @@ -150,6 +151,7 @@ enum flags XX(CB_headers_complete, "the on_headers_complete callback failed") \ XX(CB_body, "the on_body callback failed") \ XX(CB_message_complete, "the on_message_complete callback failed") \ + XX(CB_unknown_method, "the on_unknown_method callback failed") \ \ /* Parsing-related errors */ \ XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ @@ -230,6 +232,7 @@ struct http_parser_settings { http_cb on_headers_complete; http_data_cb on_body; http_cb on_message_complete; + http_data_cb on_unknown_method; }; diff --git a/test.c b/test.c index b9a5ac38..6828a42d 100644 --- a/test.c +++ b/test.c @@ -44,6 +44,7 @@ struct message { enum http_parser_type type; enum http_method method; int status_code; + char method_string[MAX_ELEMENT_SIZE]; char request_path[MAX_ELEMENT_SIZE]; char request_url[MAX_ELEMENT_SIZE]; char fragment[MAX_ELEMENT_SIZE]; @@ -1498,6 +1499,20 @@ status_complete_cb (http_parser *p) { return 0; } +int +unknown_method_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strlncat(messages[num_messages].method_string, + sizeof(messages[num_messages].method_string), + buf, + len); + // fprintf(stderr, messages[num_messages].method_string + // return 0 if the unknown method is handled by the callback, + // otherwise return -1 to cause an HPE_INVALID_METHOD + return -1; +} + int header_field_cb (http_parser *p, const char *buf, size_t len) { @@ -1761,6 +1776,7 @@ static http_parser_settings settings_pause = ,.on_body = pause_body_cb ,.on_headers_complete = pause_headers_complete_cb ,.on_message_complete = pause_message_complete_cb + ,.on_unknown_method = unknown_method_cb }; static http_parser_settings settings = @@ -1771,6 +1787,7 @@ static http_parser_settings settings = ,.on_body = body_cb ,.on_headers_complete = headers_complete_cb ,.on_message_complete = message_complete_cb + ,.on_unknown_method = unknown_method_cb }; static http_parser_settings settings_count_body = @@ -1781,6 +1798,7 @@ static http_parser_settings settings_count_body = ,.on_body = count_body_cb ,.on_headers_complete = headers_complete_cb ,.on_message_complete = message_complete_cb + ,.on_unknown_method = unknown_method_cb }; static http_parser_settings settings_null = @@ -1791,6 +1809,7 @@ static http_parser_settings settings_null = ,.on_body = 0 ,.on_headers_complete = 0 ,.on_message_complete = 0 + ,.on_unknown_method = 0 }; void @@ -3330,6 +3349,7 @@ main (void) "SA", "hello world", 0 }; + for (this_method = bad_methods; *this_method; this_method++) { char buf[200]; sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); From 41d5c469be36fe8ca4e8aa55cc400918cee392fe Mon Sep 17 00:00:00 2001 From: James M Snell Date: Wed, 21 Aug 2013 15:17:19 -0700 Subject: [PATCH 2/2] let's make sure all our method characters are valid in a token. --- http_parser.c | 8 +++++++- test.c | 28 ++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/http_parser.c b/http_parser.c index 6899b7aa..d8dd0125 100644 --- a/http_parser.c +++ b/http_parser.c @@ -879,7 +879,7 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = ULLONG_MAX; - if (!IS_ALPHA(ch)) { + if (!TOKEN(ch)) { SET_ERRNO(HPE_INVALID_METHOD); goto error; } @@ -919,8 +919,14 @@ size_t http_parser_execute (http_parser *parser, SET_ERRNO(HPE_INVALID_METHOD); goto error; } + matcher = method_strings[parser->method]; + if (!TOKEN(ch) && ch != ' ') { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + if (ch == ' ' && (parser->method == HTTP_unknown || matcher[parser->index] == '\0')) { parser->state = s_req_spaces_before_url; } else if (parser->method == HTTP_unknown) { diff --git a/test.c b/test.c index 6828a42d..5e24be67 100644 --- a/test.c +++ b/test.c @@ -1510,7 +1510,7 @@ unknown_method_cb (http_parser *p, const char *buf, size_t len) // fprintf(stderr, messages[num_messages].method_string // return 0 if the unknown method is handled by the callback, // otherwise return -1 to cause an HPE_INVALID_METHOD - return -1; + return 0; } int @@ -3335,7 +3335,9 @@ main (void) test_simple(buf, HPE_OK); } - static const char *bad_methods[] = { + static const char *ext_methods[] = { + "LINK", + "UNLINK", "ASDF", "C******", "COLA", @@ -3347,13 +3349,27 @@ main (void) "PUN", "PX", "SA", - "hello world", - 0 }; + "+1", + 0 + }; - for (this_method = bad_methods; *this_method; this_method++) { + for (this_method = ext_methods; *this_method; this_method++) { char buf[200]; sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); - test_simple(buf, HPE_INVALID_METHOD); + test_simple(buf, HPE_OK); + } + + static const char *bad_methods[] = { + "JO]", + "GE]", + "[GET]", + 0 + }; + + for (this_method = bad_methods; *this_method; this_method++) { + char buf[200]; + sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); + test_simple(buf, HPE_INVALID_METHOD); } const char *dumbfuck2 =