summarylogtreecommitdiffstats
path: root/0003-close-http-connections-that-prematurely-reset-stream.patch
diff options
context:
space:
mode:
Diffstat (limited to '0003-close-http-connections-that-prematurely-reset-stream.patch')
-rw-r--r--0003-close-http-connections-that-prematurely-reset-stream.patch312
1 files changed, 312 insertions, 0 deletions
diff --git a/0003-close-http-connections-that-prematurely-reset-stream.patch b/0003-close-http-connections-that-prematurely-reset-stream.patch
new file mode 100644
index 000000000000..936730eea7ec
--- /dev/null
+++ b/0003-close-http-connections-that-prematurely-reset-stream.patch
@@ -0,0 +1,312 @@
+From 481a9d54b8f5d73e16c6673eaea1a781c5e5705e Mon Sep 17 00:00:00 2001
+From: Yan Avlasov <yavlasov@google.com>
+Date: Thu, 28 Sep 2023 16:11:58 +0000
+Subject: [PATCH] Close HTTP connections that prematurely reset streams
+
+Signed-off-by: Yan Avlasov <yavlasov@google.com>
+---
+ changelogs/current.yaml | 9 +++
+ source/common/http/conn_manager_config.h | 1 +
+ source/common/http/conn_manager_impl.cc | 65 +++++++++++++++++
+ source/common/http/conn_manager_impl.h | 23 ++++++
+ source/common/runtime/runtime_features.cc | 1 +
+ .../multiplexed_integration_test.cc | 72 +++++++++++++++++++
+ 6 files changed, 171 insertions(+)
+
+diff --git a/changelogs/current.yaml b/changelogs/current.yaml
+index 32fc3bc047fd2..b5d45089f337c 100644
+--- a/changelogs/current.yaml
++++ b/changelogs/current.yaml
+@@ -1,6 +1,15 @@
+ date: July 26, 2023
+
+ behavior_changes:
++- area: http
++ change: |
++ Close HTTP/2 and HTTP/3 connections that prematurely reset streams. The runtime key
++ ``overload.premature_reset_min_stream_lifetime_seconds`` determines the interval where received stream
++ reset is considered premature (with 1 second default). The runtime key ``overload.premature_reset_total_stream_count``,
++ with the default value of 500, determines the number of requests received from a connection before the check for premature
++ resets is applied. The connection is disconnected if more than 50% of resets are premature.
++ Setting the runtime key ``envoy.restart_features.send_goaway_for_premature_rst_streams`` to ``false`` completely disables
++ this check.
+ - area: build
+ change: |
+ Moved the subset, ring_hash, and maglev LB code into extensions. If you use these load balancers and override
+diff --git a/source/common/http/conn_manager_config.h b/source/common/http/conn_manager_config.h
+index 52f8188c205c5..a07eb825f789b 100644
+--- a/source/common/http/conn_manager_config.h
++++ b/source/common/http/conn_manager_config.h
+@@ -66,6 +66,7 @@ namespace Http {
+ COUNTER(downstream_rq_rejected_via_ip_detection) \
+ COUNTER(downstream_rq_response_before_rq_complete) \
+ COUNTER(downstream_rq_rx_reset) \
++ COUNTER(downstream_rq_too_many_premature_resets) \
+ COUNTER(downstream_rq_timeout) \
+ COUNTER(downstream_rq_header_timeout) \
+ COUNTER(downstream_rq_too_large) \
+diff --git a/source/common/http/conn_manager_impl.cc b/source/common/http/conn_manager_impl.cc
+index 5a603aaed80b6..021c55bc4ccae 100644
+--- a/source/common/http/conn_manager_impl.cc
++++ b/source/common/http/conn_manager_impl.cc
+@@ -1,5 +1,6 @@
+ #include "source/common/http/conn_manager_impl.h"
+
++#include <chrono>
+ #include <cstdint>
+ #include <functional>
+ #include <list>
+@@ -55,6 +56,11 @@
+ namespace Envoy {
+ namespace Http {
+
++const absl::string_view ConnectionManagerImpl::PrematureResetTotalStreamCountKey =
++ "overload.premature_reset_total_stream_count";
++const absl::string_view ConnectionManagerImpl::PrematureResetMinStreamLifetimeSecondsKey =
++ "overload.premature_reset_min_stream_lifetime_seconds";
++
+ bool requestWasConnect(const RequestHeaderMapSharedPtr& headers, Protocol protocol) {
+ if (!headers) {
+ return false;
+@@ -267,6 +273,12 @@ void ConnectionManagerImpl::doEndStream(ActiveStream& stream, bool check_for_def
+ }
+
+ void ConnectionManagerImpl::doDeferredStreamDestroy(ActiveStream& stream) {
++ if (!stream.state_.is_internally_destroyed_) {
++ ++closed_non_internally_destroyed_requests_;
++ if (isPrematureRstStream(stream)) {
++ ++number_premature_stream_resets_;
++ }
++ }
+ if (stream.max_stream_duration_timer_ != nullptr) {
+ stream.max_stream_duration_timer_->disableTimer();
+ stream.max_stream_duration_timer_ = nullptr;
+@@ -343,6 +355,7 @@ void ConnectionManagerImpl::doDeferredStreamDestroy(ActiveStream& stream) {
+ if (connection_idle_timer_ && streams_.empty()) {
+ connection_idle_timer_->enableTimer(config_.idleTimeout().value());
+ }
++ maybeDrainDueToPrematureResets();
+ }
+
+ RequestDecoder& ConnectionManagerImpl::newStream(ResponseEncoder& response_encoder,
+@@ -607,6 +620,58 @@ void ConnectionManagerImpl::doConnectionClose(
+ }
+ }
+
++bool ConnectionManagerImpl::isPrematureRstStream(const ActiveStream& stream) const {
++ // Check if the request was prematurely reset, by comparing its lifetime to the configured
++ // threshold.
++ ASSERT(!stream.state_.is_internally_destroyed_);
++ absl::optional<std::chrono::nanoseconds> duration =
++ stream.filter_manager_.streamInfo().currentDuration();
++
++ // Check if request lifetime is longer than the premature reset threshold.
++ if (duration) {
++ const uint64_t lifetime = std::chrono::duration_cast<std::chrono::seconds>(*duration).count();
++ const uint64_t min_lifetime = runtime_.snapshot().getInteger(
++ ConnectionManagerImpl::PrematureResetMinStreamLifetimeSecondsKey, 1);
++ if (lifetime > min_lifetime) {
++ return false;
++ }
++ }
++
++ // If request has completed before configured threshold, also check if the Envoy proxied the
++ // response from the upstream. Requests without the response status were reset.
++ // TODO(RyanTheOptimist): Possibly support half_closed_local instead.
++ return !stream.filter_manager_.streamInfo().responseCode();
++}
++
++// Sends a GOAWAY if too many streams have been reset prematurely on this
++// connection.
++void ConnectionManagerImpl::maybeDrainDueToPrematureResets() {
++ if (!Runtime::runtimeFeatureEnabled(
++ "envoy.restart_features.send_goaway_for_premature_rst_streams") ||
++ closed_non_internally_destroyed_requests_ == 0) {
++ return;
++ }
++
++ const uint64_t limit =
++ runtime_.snapshot().getInteger(ConnectionManagerImpl::PrematureResetTotalStreamCountKey, 500);
++
++ if (closed_non_internally_destroyed_requests_ < limit) {
++ return;
++ }
++
++ if (static_cast<double>(number_premature_stream_resets_) /
++ closed_non_internally_destroyed_requests_ <
++ .5) {
++ return;
++ }
++
++ if (drain_state_ == DrainState::NotDraining) {
++ stats_.named_.downstream_rq_too_many_premature_resets_.inc();
++ doConnectionClose(Network::ConnectionCloseType::Abort, absl::nullopt,
++ "too_many_premature_resets");
++ }
++}
++
+ void ConnectionManagerImpl::onGoAway(GoAwayErrorCode) {
+ // Currently we do nothing with remote go away frames. In the future we can decide to no longer
+ // push resources if applicable.
+diff --git a/source/common/http/conn_manager_impl.h b/source/common/http/conn_manager_impl.h
+index b82b1967a5115..dad494c953bc0 100644
+--- a/source/common/http/conn_manager_impl.h
++++ b/source/common/http/conn_manager_impl.h
+@@ -115,6 +115,14 @@ class ConnectionManagerImpl : Logger::Loggable<Logger::Id::http>,
+ void setClearHopByHopResponseHeaders(bool value) { clear_hop_by_hop_response_headers_ = value; }
+ bool clearHopByHopResponseHeaders() const { return clear_hop_by_hop_response_headers_; }
+
++ // This runtime key configures the number of streams which must be closed on a connection before
++ // envoy will potentially drain a connection due to excessive prematurely reset streams.
++ static const absl::string_view PrematureResetTotalStreamCountKey;
++
++ // The minimum lifetime of a stream, in seconds, in order not to be considered
++ // prematurely closed.
++ static const absl::string_view PrematureResetMinStreamLifetimeSecondsKey;
++
+ private:
+ struct ActiveStream;
+ class MobileConnectionManagerImpl;
+@@ -544,6 +552,15 @@ class ConnectionManagerImpl : Logger::Loggable<Logger::Id::http>,
+ void doConnectionClose(absl::optional<Network::ConnectionCloseType> close_type,
+ absl::optional<StreamInfo::ResponseFlag> response_flag,
+ absl::string_view details);
++ // Returns true if a RST_STREAM for the given stream is premature. Premature
++ // means the RST_STREAM arrived before response headers were sent and than
++ // the stream was alive for short period of time. This period is specified
++ // by the optional runtime value PrematureResetMinStreamLifetimeSecondsKey,
++ // or one second if that is not present.
++ bool isPrematureRstStream(const ActiveStream& stream) const;
++ // Sends a GOAWAY if both sufficient streams have been closed on a connection
++ // and at least half have been prematurely reset?
++ void maybeDrainDueToPrematureResets();
+
+ enum class DrainState { NotDraining, Draining, Closing };
+
+@@ -584,6 +601,12 @@ class ConnectionManagerImpl : Logger::Loggable<Logger::Id::http>,
+ bool clear_hop_by_hop_response_headers_{true};
+ // The number of requests accumulated on the current connection.
+ uint64_t accumulated_requests_{};
++ // The number of requests closed on the current connection which were
++ // not internally destroyed
++ uint64_t closed_non_internally_destroyed_requests_{};
++ // The number of requests that received a premature RST_STREAM, according to
++ // the definition given in `isPrematureRstStream()`.
++ uint64_t number_premature_stream_resets_{0};
+ const std::string proxy_name_; // for Proxy-Status.
+
+ const bool refresh_rtt_after_request_{};
+diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc
+index 8b75d6bbbfb1c..60d594c964319 100644
+--- a/source/common/runtime/runtime_features.cc
++++ b/source/common/runtime/runtime_features.cc
+@@ -91,6 +91,7 @@ RUNTIME_GUARD(envoy_reloadable_features_validate_grpc_header_before_log_grpc_sta
+ RUNTIME_GUARD(envoy_reloadable_features_validate_upstream_headers);
+ RUNTIME_GUARD(envoy_restart_features_explicit_wildcard_resource);
+ RUNTIME_GUARD(envoy_restart_features_remove_runtime_singleton);
++RUNTIME_GUARD(envoy_restart_features_send_goaway_for_premature_rst_streams);
+ RUNTIME_GUARD(envoy_restart_features_udp_read_normalize_addresses);
+ RUNTIME_GUARD(envoy_restart_features_use_apple_api_for_dns_lookups);
+
+diff --git a/test/integration/multiplexed_integration_test.cc b/test/integration/multiplexed_integration_test.cc
+index 190e57065d1c6..b4260ff3fa9ec 100644
+--- a/test/integration/multiplexed_integration_test.cc
++++ b/test/integration/multiplexed_integration_test.cc
+@@ -1,4 +1,5 @@
+ #include <algorithm>
++#include <chrono>
+ #include <memory>
+ #include <string>
+
+@@ -25,6 +26,7 @@
+ #include "test/mocks/http/mocks.h"
+ #include "test/test_common/network_utility.h"
+ #include "test/test_common/printers.h"
++#include "test/test_common/simulated_time_system.h"
+ #include "test/test_common/utility.h"
+
+ #include "gtest/gtest.h"
+@@ -92,6 +94,15 @@ INSTANTIATE_TEST_SUITE_P(IpVersions, MultiplexedIntegrationTest,
+ {Http::CodecType::HTTP1})),
+ HttpProtocolIntegrationTest::protocolTestParamsToString);
+
++class MultiplexedIntegrationTestWithSimulatedTime : public Event::TestUsingSimulatedTime,
++ public MultiplexedIntegrationTest {};
++
++INSTANTIATE_TEST_SUITE_P(IpVersions, MultiplexedIntegrationTestWithSimulatedTime,
++ testing::ValuesIn(HttpProtocolIntegrationTest::getProtocolTestParams(
++ {Http::CodecType::HTTP2, Http::CodecType::HTTP3},
++ {Http::CodecType::HTTP1})),
++ HttpProtocolIntegrationTest::protocolTestParamsToString);
++
+ TEST_P(MultiplexedIntegrationTest, RouterRequestAndResponseWithBodyNoBuffer) {
+ testRouterRequestAndResponseWithBody(1024, 512, false, false);
+ }
+@@ -1076,6 +1087,67 @@ TEST_P(MultiplexedIntegrationTest, GoAway) {
+ EXPECT_EQ("200", response->headers().getStatusValue());
+ }
+
++// TODO(rch): Add a unit test which covers internal redirect handling.
++TEST_P(MultiplexedIntegrationTestWithSimulatedTime, GoAwayAfterTooManyResets) {
++ EXCLUDE_DOWNSTREAM_HTTP3; // Need to wait for the server to reset the stream
++ // before opening new one.
++ config_helper_.addRuntimeOverride("envoy.restart_features.send_goaway_for_premature_rst_streams",
++ "true");
++ const int total_streams = 100;
++ config_helper_.addRuntimeOverride("overload.premature_reset_total_stream_count",
++ absl::StrCat(total_streams));
++ initialize();
++
++ Http::TestRequestHeaderMapImpl headers{
++ {":method", "GET"}, {":path", "/healthcheck"}, {":scheme", "http"}, {":authority", "host"}};
++ codec_client_ = makeHttpConnection(lookupPort("http"));
++ for (int i = 0; i < total_streams; ++i) {
++ auto encoder_decoder = codec_client_->startRequest(headers);
++ request_encoder_ = &encoder_decoder.first;
++ auto response = std::move(encoder_decoder.second);
++ codec_client_->sendReset(*request_encoder_);
++ ASSERT_TRUE(response->waitForReset());
++ }
++
++ // Envoy should disconnect client due to premature reset check
++ ASSERT_TRUE(codec_client_->waitForDisconnect());
++ test_server_->waitForCounterEq("http.config_test.downstream_rq_rx_reset", total_streams);
++ test_server_->waitForCounterEq("http.config_test.downstream_rq_too_many_premature_resets", 1);
++}
++
++TEST_P(MultiplexedIntegrationTestWithSimulatedTime, DontGoAwayAfterTooManyResetsForLongStreams) {
++ EXCLUDE_DOWNSTREAM_HTTP3; // Need to wait for the server to reset the stream
++ // before opening new one.
++ config_helper_.addRuntimeOverride("envoy.restart_features.send_goaway_for_premature_rst_streams",
++ "true");
++ const int total_streams = 100;
++ const int stream_lifetime_seconds = 2;
++ config_helper_.addRuntimeOverride("overload.premature_reset_total_stream_count",
++ absl::StrCat(total_streams));
++
++ config_helper_.addRuntimeOverride("overload.premature_reset_min_stream_lifetime_seconds",
++ absl::StrCat(stream_lifetime_seconds));
++
++ initialize();
++
++ Http::TestRequestHeaderMapImpl headers{
++ {":method", "GET"}, {":path", "/healthcheck"}, {":scheme", "http"}, {":authority", "host"}};
++ codec_client_ = makeHttpConnection(lookupPort("http"));
++
++ std::string request_counter = "http.config_test.downstream_rq_total";
++ std::string reset_counter = "http.config_test.downstream_rq_rx_reset";
++ for (int i = 0; i < total_streams * 2; ++i) {
++ auto encoder_decoder = codec_client_->startRequest(headers);
++ request_encoder_ = &encoder_decoder.first;
++ auto response = std::move(encoder_decoder.second);
++ test_server_->waitForCounterEq(request_counter, i + 1);
++ timeSystem().advanceTimeWait(std::chrono::seconds(2 * stream_lifetime_seconds));
++ codec_client_->sendReset(*request_encoder_);
++ ASSERT_TRUE(response->waitForReset());
++ test_server_->waitForCounterEq(reset_counter, i + 1);
++ }
++}
++
+ TEST_P(MultiplexedIntegrationTest, Trailers) { testTrailers(1024, 2048, false, false); }
+
+ TEST_P(MultiplexedIntegrationTest, TrailersGiantBody) {