diff --git a/contrib/parsertrace.c b/contrib/parsertrace.c index c9bc71ec..9fcdb185 100644 --- a/contrib/parsertrace.c +++ b/contrib/parsertrace.c @@ -22,78 +22,119 @@ * IN THE SOFTWARE. */ -/* Dump what the parser finds to stdout as it happen */ +/* Dump what the parser finds to stderr as it happen, body go to stdout */ +#define _GNU_SOURCE 1 #include "http_parser.h" #include #include +#include +#include #include +#include +#include +#include +#include +#include +#include + +struct timeval start; +unsigned long long timestamp() { + struct timeval stop; + gettimeofday(&stop, NULL); + return (stop.tv_sec - start.tv_sec)*1000 + (stop.tv_usec - start.tv_usec)/1000; +} + +int log_event(const char *fmt, ...) { + va_list ap; + char *nfmt; + va_start(ap, fmt); + if (asprintf(&nfmt, "%s %6llu ms: %s %s\n", + isatty(STDERR_FILENO)?"\033[1;33m**":"\n**", timestamp(), + fmt, isatty(STDERR_FILENO)?"\033[0m":"**\n") == -1) { + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } else { + vfprintf(stderr, nfmt, ap); + free(nfmt); + } + fflush(stderr); + return 0; +} int on_message_begin(http_parser* _) { (void)_; - printf("\n***MESSAGE BEGIN***\n\n"); - return 0; + return log_event("MESSAGE BEGIN"); } int on_headers_complete(http_parser* _) { (void)_; - printf("\n***HEADERS COMPLETE***\n\n"); - return 0; + return log_event("HEADERS COMPLETE"); } int on_message_complete(http_parser* _) { (void)_; - printf("\n***MESSAGE COMPLETE***\n\n"); - return 0; + return log_event("MESSAGE COMPLETE"); +} + +int on_chunk_begin(http_parser* _) { + (void)_; + return log_event("CHUNK BEGIN"); +} + +int on_chunk_complete(http_parser* _) { + (void)_; + return log_event("CHUNK COMPLETE"); } int on_url(http_parser* _, const char* at, size_t length) { (void)_; - printf("Url: %.*s\n", (int)length, at); - return 0; + return log_event("URL: %.*s", (int)length, at); } int on_header_field(http_parser* _, const char* at, size_t length) { (void)_; - printf("Header field: %.*s\n", (int)length, at); - return 0; + return log_event("Header field: %.*s", (int)length, at); } int on_header_value(http_parser* _, const char* at, size_t length) { (void)_; - printf("Header value: %.*s\n", (int)length, at); - return 0; + return log_event("Header value: %.*s", (int)length, at); } int on_body(http_parser* _, const char* at, size_t length) { (void)_; - printf("Body: %.*s\n", (int)length, at); - return 0; + log_event(" (size=%zu)", length); + printf("%.*s", (int)length, at); + return log_event(""); } void usage(const char* name) { fprintf(stderr, "Usage: %s $type $filename\n" + " %s $url\n" " type: -x, where x is one of {r,b,q}\n" " parses file as a Response, reQuest, or Both\n", - name); + name, name); exit(EXIT_FAILURE); } int main(int argc, char* argv[]) { - enum http_parser_type file_type; + char data[2 << 16]; + enum http_parser_type file_type = HTTP_RESPONSE; - if (argc != 3) { + if (argc != 3 && argc != 2) { usage(argv[0]); } - char* type = argv[1]; - if (type[0] != '-') { - usage(argv[0]); - } + if (argc == 3) { + char* type = argv[1]; + if (type[0] != '-') { + usage(argv[0]); + } - switch (type[1]) { - /* in the case of "-", type[1] will be NUL */ + switch (type[1]) { + /* in the case of "-", type[1] will be NUL */ case 'r': file_type = HTTP_RESPONSE; break; @@ -105,28 +146,81 @@ int main(int argc, char* argv[]) { break; default: usage(argv[0]); + } } - char* filename = argv[2]; - FILE* file = fopen(filename, "r"); - if (file == NULL) { - perror("fopen"); - return EXIT_FAILURE; - } + int file = -1; - fseek(file, 0, SEEK_END); - long file_length = ftell(file); - if (file_length == -1) { - perror("ftell"); - return EXIT_FAILURE; - } - fseek(file, 0, SEEK_SET); + if (argc == 3) { + char* filename = argv[2]; + file = (strcmp(filename, "-"))?open(filename, O_RDONLY):STDIN_FILENO; + if (file == -1) { + perror("open"); + return EXIT_FAILURE; + } + } else { + char* url = argv[1]; + struct http_parser_url u; + if (http_parser_parse_url(url, strlen(url), 0, &u) != 0) { + fprintf(stderr, "Unable to parse %s\n", url); + return EXIT_FAILURE; + } + if ((u.field_set & (1 << UF_SCHEMA)) == 0 || + (u.field_set & (1 << UF_HOST)) == 0 || + (u.field_set & (1 << UF_PATH)) == 0 || + u.field_data[UF_SCHEMA].len != 4 || + strncmp(url + u.field_data[UF_SCHEMA].off, "http", 4)) { + fprintf(stderr, "Absolute HTTP URL expected\n"); + return EXIT_FAILURE; + } - char* data = malloc(file_length); - if (fread(data, 1, file_length, file) != (size_t)file_length) { - fprintf(stderr, "couldn't read entire file\n"); - free(data); - return EXIT_FAILURE; + int n; + char *remote; char *port; + remote = strndup(url + u.field_data[UF_HOST].off, u.field_data[UF_HOST].len); + port = u.port?strndup(url + u.field_data[UF_PORT].off, u.field_data[UF_PORT].len):strdup("http"); + + struct addrinfo *res, *ressave; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + .ai_protocol = IPPROTO_TCP + }; + if ((n = getaddrinfo(remote, port, &hints, &res)) != 0) { + fprintf(stderr, "unable to get address for %s:%s: %s", + remote, port, gai_strerror(n)); + free(remote); free(port); + return EXIT_FAILURE; + } + ressave = res; + do { + int s = socket(res->ai_family, res->ai_socktype, res->ai_protocol); + if (s < 0) continue; + if (connect(s, res->ai_addr, res->ai_addrlen) != 0) { + close(s); + continue; + } + file = s; + break; + } while ((res = res->ai_next) != NULL); + freeaddrinfo(ressave); + + if (file == -1) { + fprintf(stderr, "unable to connect to %s:%s", + remote, port); + free(remote); free(port); + return EXIT_FAILURE; + } + + struct iovec stuff[] = { + {"GET ", 4}, + {url + u.field_data[UF_PATH].off, + u.field_data[UF_PATH].len + ((u.field_set & (1 << UF_QUERY))?(1 + u.field_data[UF_QUERY].len):0) }, + {" HTTP/1.1\r\nHost: ", 17}, + {url + u.field_data[UF_HOST].off, u.field_data[UF_HOST].len}, + {"\r\n\r\n", 4} + }; + writev(file, stuff, sizeof(stuff)/sizeof(stuff[0])); + shutdown(file, SHUT_WR); } http_parser_settings settings; @@ -136,21 +230,28 @@ int main(int argc, char* argv[]) { settings.on_header_field = on_header_field; settings.on_header_value = on_header_value; settings.on_headers_complete = on_headers_complete; + settings.on_chunk_begin = on_chunk_begin; settings.on_body = on_body; + settings.on_chunk_complete = on_chunk_complete; settings.on_message_complete = on_message_complete; http_parser parser; http_parser_init(&parser, file_type); - size_t nparsed = http_parser_execute(&parser, &settings, data, file_length); - free(data); - - if (nparsed != (size_t)file_length) { - fprintf(stderr, - "Error: %s (%s)\n", - http_errno_description(HTTP_PARSER_ERRNO(&parser)), - http_errno_name(HTTP_PARSER_ERRNO(&parser))); - return EXIT_FAILURE; + gettimeofday(&start, NULL); + + size_t len; + while ((len = read(file, data, sizeof(data))) > 0) { + size_t nparsed = http_parser_execute(&parser, &settings, data, len); + if (nparsed != len) { + fprintf(stderr, + "Error: %s (%s)\n", + http_errno_description(HTTP_PARSER_ERRNO(&parser)), + http_errno_name(HTTP_PARSER_ERRNO(&parser))); + if (file && file != STDIN_FILENO) close(file); + return EXIT_FAILURE; + } } + if (file && file != STDIN_FILENO) close(file); return EXIT_SUCCESS; } diff --git a/http_parser.c b/http_parser.c index c87186f3..092a3d4a 100644 --- a/http_parser.c +++ b/http_parser.c @@ -1782,6 +1782,7 @@ size_t http_parser_execute (http_parser *parser, parser->state = s_header_field_start; } else { parser->state = s_chunk_data; + CALLBACK_NOTIFY(chunk_begin); } break; } @@ -1822,6 +1823,7 @@ size_t http_parser_execute (http_parser *parser, STRICT_CHECK(ch != LF); parser->nread = 0; parser->state = s_chunk_size_start; + CALLBACK_NOTIFY(chunk_complete); break; default: diff --git a/http_parser.h b/http_parser.h index 98c0905b..849fc269 100644 --- a/http_parser.h +++ b/http_parser.h @@ -148,7 +148,9 @@ enum flags XX(CB_header_field, "the on_header_field callback failed") \ XX(CB_header_value, "the on_header_value callback failed") \ XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_chunk_begin, "the on_chunk_begin callback failed") \ XX(CB_body, "the on_body callback failed") \ + XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ XX(CB_message_complete, "the on_message_complete callback failed") \ \ /* Parsing-related errors */ \ @@ -228,7 +230,9 @@ struct http_parser_settings { http_data_cb on_header_field; http_data_cb on_header_value; http_cb on_headers_complete; + http_cb on_chunk_begin; http_data_cb on_body; + http_cb on_chunk_complete; http_cb on_message_complete; }; diff --git a/test.c b/test.c index b9a5ac38..088f4adb 100644 --- a/test.c +++ b/test.c @@ -54,6 +54,8 @@ struct message { const char *userinfo; uint16_t port; int num_headers; + int num_chunks; + int num_end_chunks; enum { NONE=0, FIELD, VALUE } last_header_element; char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; int should_keep_alive; @@ -290,6 +292,7 @@ const struct message requests[] = ,.request_path= "/post_chunked_all_your_base" ,.request_url= "/post_chunked_all_your_base" ,.num_headers= 1 + ,.num_chunks= 1 ,.headers= { { "Transfer-Encoding" , "chunked" } } @@ -316,6 +319,7 @@ const struct message requests[] = ,.request_path= "/two_chunks_mult_zero_end" ,.request_url= "/two_chunks_mult_zero_end" ,.num_headers= 1 + ,.num_chunks= 2 ,.headers= { { "Transfer-Encoding", "chunked" } } @@ -344,6 +348,7 @@ const struct message requests[] = ,.request_path= "/chunked_w_trailing_headers" ,.request_url= "/chunked_w_trailing_headers" ,.num_headers= 3 + ,.num_chunks= 2 ,.headers= { { "Transfer-Encoding", "chunked" } , { "Vary", "*" } @@ -372,6 +377,7 @@ const struct message requests[] = ,.request_path= "/chunked_w_bullshit_after_length" ,.request_url= "/chunked_w_bullshit_after_length" ,.num_headers= 1 + ,.num_chunks= 2 ,.headers= { { "Transfer-Encoding", "chunked" } } @@ -1044,6 +1050,7 @@ const struct message responses[] = ,.http_minor= 1 ,.status_code= 200 ,.num_headers= 2 + ,.num_chunks= 2 ,.headers= { {"Content-Type", "text/plain" } , {"Transfer-Encoding", "chunked" } @@ -1187,6 +1194,7 @@ const struct message responses[] = ,.http_minor= 1 ,.status_code= 200 ,.num_headers= 11 + ,.num_chunks= 0 ,.headers= { { "Date", "Tue, 28 Sep 2010 01:14:13 GMT" } , { "Server", "Apache" } @@ -1369,6 +1377,7 @@ const struct message responses[] = ,.http_minor= 1 ,.status_code= 200 ,.num_headers= 1 + ,.num_chunks= 0 ,.headers= { { "Transfer-Encoding", "chunked" } } @@ -1623,6 +1632,22 @@ message_complete_cb (http_parser *p) return 0; } +int +chunk_begin_cb (http_parser *p) +{ + assert(p == parser); + messages[num_messages].num_chunks++; + return 0; +} + +int +chunk_complete_cb (http_parser *p) +{ + assert(p == parser); + messages[num_messages].num_end_chunks++; + return 0; +} + /* These dontcall_* callbacks exist so that we can verify that when we're * paused, no additional callbacks are invoked */ int @@ -1683,6 +1708,24 @@ dontcall_message_complete_cb (http_parser *p) abort(); } +int +dontcall_chunk_begin_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_chunk_begin() called on paused " + "parser ***\n\n"); + abort(); +} + +int +dontcall_chunk_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_chunk_complete() called on paused " + "parser ***\n\n"); + abort(); +} + static http_parser_settings settings_dontcall = {.on_message_begin = dontcall_message_begin_cb ,.on_header_field = dontcall_header_field_cb @@ -1691,6 +1734,8 @@ static http_parser_settings settings_dontcall = ,.on_body = dontcall_body_cb ,.on_headers_complete = dontcall_headers_complete_cb ,.on_message_complete = dontcall_message_complete_cb + ,.on_chunk_begin = dontcall_chunk_begin_cb + ,.on_chunk_complete = dontcall_chunk_complete_cb }; /* These pause_* callbacks always pause the parser and just invoke the regular @@ -1753,6 +1798,22 @@ pause_message_complete_cb (http_parser *p) return message_complete_cb(p); } +int +pause_chunk_begin_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return chunk_begin_cb(p); +} + +int +pause_chunk_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return chunk_complete_cb(p); +} + static http_parser_settings settings_pause = {.on_message_begin = pause_message_begin_cb ,.on_header_field = pause_header_field_cb @@ -1761,6 +1822,8 @@ static http_parser_settings settings_pause = ,.on_body = pause_body_cb ,.on_headers_complete = pause_headers_complete_cb ,.on_message_complete = pause_message_complete_cb + ,.on_chunk_begin = pause_chunk_begin_cb + ,.on_chunk_complete = pause_chunk_complete_cb }; static http_parser_settings settings = @@ -1771,6 +1834,8 @@ static http_parser_settings settings = ,.on_body = body_cb ,.on_headers_complete = headers_complete_cb ,.on_message_complete = message_complete_cb + ,.on_chunk_begin = chunk_begin_cb + ,.on_chunk_complete = chunk_complete_cb }; static http_parser_settings settings_count_body = @@ -1781,6 +1846,8 @@ static http_parser_settings settings_count_body = ,.on_body = count_body_cb ,.on_headers_complete = headers_complete_cb ,.on_message_complete = message_complete_cb + ,.on_chunk_begin = chunk_begin_cb + ,.on_chunk_complete = chunk_complete_cb }; static http_parser_settings settings_null = @@ -1791,6 +1858,8 @@ static http_parser_settings settings_null = ,.on_body = 0 ,.on_headers_complete = 0 ,.on_message_complete = 0 + ,.on_chunk_begin = 0 + ,.on_chunk_complete = 0 }; void @@ -1959,6 +2028,8 @@ message_eq (int index, const struct message *expected) } MESSAGE_CHECK_NUM_EQ(expected, m, num_headers); + MESSAGE_CHECK_NUM_EQ(expected, m, num_chunks); + if (!check_num_eq(expected, "num_end_chunks", m->num_chunks, m->num_end_chunks)) return 0; int r; for (i = 0; i < m->num_headers; i++) { @@ -3244,6 +3315,7 @@ main (void) ,.http_minor= 0 ,.status_code= 200 ,.num_headers= 2 + ,.num_chunks= 31337 ,.headers= { { "Transfer-Encoding", "chunked" } , { "Content-Type", "text/plain" }