diff --git a/boost/network/protocol/http/client/async_impl.hpp b/boost/network/protocol/http/client/async_impl.hpp index 31578c2eb..0c0bc80ce 100644 --- a/boost/network/protocol/http/client/async_impl.hpp +++ b/boost/network/protocol/http/client/async_impl.hpp @@ -41,7 +41,7 @@ struct async_client typedef std::function body_generator_function_type; async_client(bool cache_resolved, bool follow_redirect, - bool always_verify_peer, int timeout, + bool always_verify_peer, int timeout, bool remove_chunk_markers, std::shared_ptr service, optional certificate_filename, optional verify_path, @@ -49,7 +49,8 @@ struct async_client optional private_key_file, optional ciphers, optional sni_hostname, long ssl_options) - : connection_base(cache_resolved, follow_redirect, timeout), + : connection_base(cache_resolved, follow_redirect, timeout, + remove_chunk_markers), service_ptr(service.get() ? service : std::make_shared()), service_(*service_ptr), diff --git a/boost/network/protocol/http/client/connection/async_base.hpp b/boost/network/protocol/http/client/connection/async_base.hpp index a31ad30fd..b1c1aa67a 100644 --- a/boost/network/protocol/http/client/connection/async_base.hpp +++ b/boost/network/protocol/http/client/connection/async_base.hpp @@ -44,6 +44,7 @@ struct async_connection_base { static connection_ptr new_connection( resolve_function resolve, resolver_type &resolver, bool follow_redirect, bool always_verify_peer, bool https, int timeout, + bool remove_chunk_markers, optional certificate_filename = optional(), optional const &verify_path = optional(), optional certificate_file = optional(), @@ -59,7 +60,8 @@ struct async_connection_base { certificate_filename, verify_path, certificate_file, private_key_file, ciphers, sni_hostname, ssl_options); auto temp = std::make_shared( - resolver, resolve, follow_redirect, timeout, std::move(delegate)); + resolver, resolve, follow_redirect, timeout, remove_chunk_markers, + std::move(delegate)); BOOST_ASSERT(temp != nullptr); return temp; } diff --git a/boost/network/protocol/http/client/connection/async_normal.hpp b/boost/network/protocol/http/client/connection/async_normal.hpp index 00ec41a37..97af80fe2 100644 --- a/boost/network/protocol/http/client/connection/async_normal.hpp +++ b/boost/network/protocol/http/client/connection/async_normal.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #include namespace boost { @@ -37,6 +38,82 @@ namespace network { namespace http { namespace impl { +template +struct chunk_encoding_parser { + chunk_encoding_parser() : state(state_t::header), chunk_size(0) {} + + enum class state_t { header, header_end, data, data_end }; + + state_t state; + size_t chunk_size; + std::array::type, 1024> buffer; + + void update_chunk_size( + boost::iterator_range::type, 1024>::const_iterator> const &range) { + if (range.empty()) return; + std::stringstream ss; + ss << std::hex << range; + size_t size; + ss >> size; + // New digits are appended as LSBs + chunk_size = (chunk_size << (range.size() * 4)) | size; + } + + boost::iterator_range< + typename std::array::type, 1024>::const_iterator> + operator()( + boost::iterator_range::type, 1024>::const_iterator> const &range) { + auto iter = boost::begin(range); + auto begin = iter; + auto pos = boost::begin(buffer); + + while (iter != boost::end(range)) switch (state) { + case state_t::header: + iter = std::find(iter, boost::end(range), '\r'); + update_chunk_size(boost::make_iterator_range(begin, iter)); + if (iter != boost::end(range)) { + state = state_t::header_end; + ++iter; + } + break; + + case state_t::header_end: + BOOST_ASSERT(*iter == '\n'); + ++iter; + state = state_t::data; + break; + + case state_t::data: + if (chunk_size == 0) { + BOOST_ASSERT(*iter == '\r'); + ++iter; + state = state_t::data_end; + } else { + auto len = std::min(chunk_size, + (size_t)std::distance(iter, boost::end(range))); + begin = iter; + iter = std::next(iter, len); + pos = std::copy(begin, iter, pos); + chunk_size -= len; + } + break; + + case state_t::data_end: + BOOST_ASSERT(*iter == '\n'); + ++iter; + begin = iter; + state = state_t::header; + break; + + default: + BOOST_ASSERT(false && "Bug, report this to the developers!"); + } + return boost::make_iterator_range(boost::begin(buffer), pos); + } +}; + template struct async_connection_base; @@ -73,8 +150,10 @@ struct http_async_connection http_async_connection(resolver_type& resolver, resolve_function resolve, bool follow_redirect, int timeout, + bool remove_chunk_markers, connection_delegate_ptr delegate) : timeout_(timeout), + remove_chunk_markers_(remove_chunk_markers), timer_(resolver.get_io_service()), is_timedout_(false), follow_redirect_(follow_redirect), @@ -354,8 +433,11 @@ struct http_async_connection // The invocation of the callback is synchronous to allow us to // wait before scheduling another read. - callback(make_iterator_range(begin, end), ec); - + if (this->is_chunk_encoding && remove_chunk_markers_) { + callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); + } else { + callback(make_iterator_range(begin, end), ec); + } auto self = this->shared_from_this(); delegate_->read_some( boost::asio::mutable_buffers_1(this->part.data(), @@ -394,16 +476,33 @@ struct http_async_connection // We call the callback function synchronously passing the error // condition (in this case, end of file) so that it can handle it // appropriately. - callback(make_iterator_range(begin, end), ec); + if (this->is_chunk_encoding && remove_chunk_markers_) { + callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); + } else { + callback(make_iterator_range(begin, end), ec); + } } else { string_type body_string; - std::swap(body_string, this->partial_parsed); - auto it = this->part.begin(); - std::advance(it, bytes_transferred); - body_string.append(this->part.begin(), it); - if (this->is_chunk_encoding) { - this->body_promise.set_value(parse_chunk_encoding(body_string)); + if (this->is_chunk_encoding && remove_chunk_markers_) { + for (size_t i = 0; i < this->partial_parsed.size(); i += 1024) { + auto range = parse_chunk_encoding(boost::make_iterator_range( + static_cast::type, 1024>::const_iterator>( + this->partial_parsed.data()) + i, + static_cast::type, 1024>::const_iterator>( + this->partial_parsed.data()) + + std::min(i + 1024, this->partial_parsed.size()))); + body_string.append(boost::begin(range), boost::end(range)); + } + this->partial_parsed.clear(); + auto range = parse_chunk_encoding(boost::make_iterator_range( + this->part.begin(), + this->part.begin() + bytes_transferred)); + body_string.append(boost::begin(range), boost::end(range)); + this->body_promise.set_value(body_string); } else { + std::swap(body_string, this->partial_parsed); + body_string.append(this->part.begin(), + this->part.begin() + bytes_transferred); this->body_promise.set_value(body_string); } } @@ -425,7 +524,11 @@ struct http_async_connection this->part.begin(); typename protocol_base::buffer_type::const_iterator end = begin; std::advance(end, bytes_transferred); - callback(make_iterator_range(begin, end), ec); + if (this->is_chunk_encoding && remove_chunk_markers_) { + callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); + } else { + callback(make_iterator_range(begin, end), ec); + } auto self = this->shared_from_this(); delegate_->read_some( boost::asio::mutable_buffers_1(this->part.data(), @@ -488,38 +591,8 @@ struct http_async_connection } } - string_type parse_chunk_encoding(string_type& body_string) { - string_type body; - string_type crlf = "\r\n"; - - typename string_type::iterator begin = body_string.begin(); - for (typename string_type::iterator iter = - std::search(begin, body_string.end(), crlf.begin(), crlf.end()); - iter != body_string.end(); - iter = - std::search(begin, body_string.end(), crlf.begin(), crlf.end())) { - string_type line(begin, iter); - if (line.empty()) { - break; - } - std::stringstream stream(line); - int len; - stream >> std::hex >> len; - std::advance(iter, 2); - if (len == 0) { - break; - } - if (len <= body_string.end() - iter) { - body.insert(body.end(), iter, iter + len); - std::advance(iter, len + 2); - } - begin = iter; - } - - return body; - } - int timeout_; + bool remove_chunk_markers_; boost::asio::steady_timer timer_; bool is_timedout_; bool follow_redirect_; @@ -529,6 +602,7 @@ struct http_async_connection connection_delegate_ptr delegate_; boost::asio::streambuf command_streambuf; string_type method; + chunk_encoding_parser parse_chunk_encoding; }; } // namespace impl diff --git a/boost/network/protocol/http/client/facade.hpp b/boost/network/protocol/http/client/facade.hpp index 0d175acd9..7ec8e2f0a 100644 --- a/boost/network/protocol/http/client/facade.hpp +++ b/boost/network/protocol/http/client/facade.hpp @@ -312,7 +312,8 @@ class basic_client_facade { options.openssl_verify_path(), options.openssl_certificate_file(), options.openssl_private_key_file(), options.openssl_ciphers(), options.openssl_sni_hostname(), options.openssl_options(), - options.io_service(), options.timeout())); + options.io_service(), options.timeout(), + options.remove_chunk_markers())); } }; diff --git a/boost/network/protocol/http/client/options.hpp b/boost/network/protocol/http/client/options.hpp index 73d685577..afc265abd 100644 --- a/boost/network/protocol/http/client/options.hpp +++ b/boost/network/protocol/http/client/options.hpp @@ -34,7 +34,8 @@ class client_options { openssl_options_(0), io_service_(), always_verify_peer_(true), - timeout_(0) {} + timeout_(0), + remove_chunk_markers_(true) {} client_options(client_options const& other) : cache_resolved_(other.cache_resolved_), @@ -48,7 +49,8 @@ class client_options { openssl_options_(other.openssl_options_), io_service_(other.io_service_), always_verify_peer_(other.always_verify_peer_), - timeout_(other.timeout_) {} + timeout_(other.timeout_), + remove_chunk_markers_(other.remove_chunk_markers_) {} client_options& operator=(client_options other) { other.swap(*this); @@ -69,6 +71,7 @@ class client_options { swap(io_service_, other.io_service_); swap(always_verify_peer_, other.always_verify_peer_); swap(timeout_, other.timeout_); + swap(remove_chunk_markers_, other.remove_chunk_markers_); } /// Specify whether the client should cache resolved endpoints. @@ -154,6 +157,12 @@ class client_options { return *this; } + /// Set whether we process chunked-encoded streams. + client_options& remove_chunk_markers(bool v) { + remove_chunk_markers_ = v; + return *this; + } + bool cache_resolved() const { return cache_resolved_; } bool follow_redirects() const { return follow_redirects_; } @@ -190,6 +199,8 @@ class client_options { int timeout() const { return timeout_; } + bool remove_chunk_markers() const { return remove_chunk_markers_; } + private: bool cache_resolved_; bool follow_redirects_; @@ -203,6 +214,7 @@ class client_options { std::shared_ptr io_service_; bool always_verify_peer_; int timeout_; + bool remove_chunk_markers_; }; template diff --git a/boost/network/protocol/http/client/pimpl.hpp b/boost/network/protocol/http/client/pimpl.hpp index d62be32ce..01c77ca70 100644 --- a/boost/network/protocol/http/client/pimpl.hpp +++ b/boost/network/protocol/http/client/pimpl.hpp @@ -74,10 +74,12 @@ struct basic_client_impl optional const& private_key_file, optional const& ciphers, optional const& sni_hostname, long ssl_options, - std::shared_ptr service, int timeout) - : base_type(cache_resolved, follow_redirect, always_verify_peer, timeout, - service, certificate_filename, verify_path, certificate_file, - private_key_file, ciphers, sni_hostname, ssl_options) {} + std::shared_ptr service, int timeout, + bool remove_chunk_markers) + : base_type(cache_resolved, follow_redirect, always_verify_peer, timeout, + remove_chunk_markers, service, certificate_filename, verify_path, + certificate_file, private_key_file, ciphers, sni_hostname, + ssl_options) {} ~basic_client_impl() = default; }; diff --git a/boost/network/protocol/http/policies/async_connection.hpp b/boost/network/protocol/http/policies/async_connection.hpp index 990103ce5..64a6b287e 100644 --- a/boost/network/protocol/http/policies/async_connection.hpp +++ b/boost/network/protocol/http/policies/async_connection.hpp @@ -40,7 +40,7 @@ struct async_connection_policy : resolver_policy::type { struct connection_impl { connection_impl( bool follow_redirect, bool always_verify_peer, resolve_function resolve, - resolver_type& resolver, bool https, int timeout, + resolver_type& resolver, bool https, int timeout, bool remove_chunk_markers, optional /*unused*/ const& certificate_filename, optional const& verify_path, optional const& certificate_file, @@ -49,9 +49,9 @@ struct async_connection_policy : resolver_policy::type { optional const& sni_hostname, long ssl_options) { pimpl = impl::async_connection_base:: new_connection(resolve, resolver, follow_redirect, always_verify_peer, - https, timeout, certificate_filename, verify_path, - certificate_file, private_key_file, ciphers, - sni_hostname, ssl_options); + https, timeout, remove_chunk_markers, + certificate_filename, verify_path, certificate_file, + private_key_file, ciphers, sni_hostname, ssl_options); } basic_response send_request(string_type /*unused*/ const& method, @@ -88,20 +88,22 @@ struct async_connection_policy : resolver_policy::type { this->resolve(resolver, host, port, once_resolved); }, resolver, boost::iequals(protocol_, string_type("https")), timeout_, - certificate_filename, verify_path, certificate_file, private_key_file, - ciphers, sni_hostname, ssl_options); + remove_chunk_markers_, certificate_filename, verify_path, + certificate_file, private_key_file, ciphers, sni_hostname, ssl_options); } void cleanup() {} async_connection_policy(bool cache_resolved, bool follow_redirect, - int timeout) + int timeout, bool remove_chunk_markers) : resolver_base(cache_resolved), follow_redirect_(follow_redirect), - timeout_(timeout) {} + timeout_(timeout), + remove_chunk_markers_(remove_chunk_markers) {} bool follow_redirect_; int timeout_; + bool remove_chunk_markers_; }; } // namespace http diff --git a/libs/network/test/http/client_get_different_port_test.cpp b/libs/network/test/http/client_get_different_port_test.cpp index 78e7e7d35..1b9d24367 100644 --- a/libs/network/test/http/client_get_different_port_test.cpp +++ b/libs/network/test/http/client_get_different_port_test.cpp @@ -15,9 +15,10 @@ namespace http = boost::network::http; TYPED_TEST_CASE(HTTPClientTest, ClientTypes); TYPED_TEST(HTTPClientTest, GetDifferentPort) { - TypeParam client; - typename TypeParam::request r("http://www.boost.org:80/"); - auto response_ = client.get(r); + using client = TypeParam; + client client_; + typename TypeParam::request request("http://www.boost.org:80/"); + auto response_ = client_.get(request); auto range = headers(response_)["Content-Type"]; EXPECT_TRUE(std::begin(range) != std::end(range)); EXPECT_NE(0, body(response_).size()); diff --git a/libs/network/test/http/client_get_streaming_test.cpp b/libs/network/test/http/client_get_streaming_test.cpp index 0de687c82..fed26f772 100644 --- a/libs/network/test/http/client_get_streaming_test.cpp +++ b/libs/network/test/http/client_get_streaming_test.cpp @@ -32,7 +32,8 @@ TYPED_TEST(HTTPClientTest, GetStreamingTest) { typename TypeParam::string_type dummy_body; body_handler handler_instance(body_string); { - TypeParam client_; + using client = TypeParam; + client client_; ASSERT_NO_THROW(response = client_.get(request, handler_instance)); auto range = headers(response)["Content-Type"]; ASSERT_TRUE(!boost::empty(range));