Skip to content

Allow immediate request failure on connection error #625

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ final class HTTPConnectionPool {

self._state = StateMachine(
idGenerator: idGenerator,
maximumConcurrentHTTP1Connections: clientConfiguration.connectionPool.concurrentHTTP1ConnectionsPerHostSoftLimit
maximumConcurrentHTTP1Connections: clientConfiguration.connectionPool.concurrentHTTP1ConnectionsPerHostSoftLimit,
retryConnectionEstablishment: clientConfiguration.connectionPool.retryConnectionEstablishment
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import NIOCore
extension HTTPConnectionPool {
struct HTTP1StateMachine {
typealias Action = HTTPConnectionPool.StateMachine.Action
typealias RequestAction = HTTPConnectionPool.StateMachine.RequestAction
typealias ConnectionMigrationAction = HTTPConnectionPool.StateMachine.ConnectionMigrationAction
typealias EstablishedAction = HTTPConnectionPool.StateMachine.EstablishedAction
typealias EstablishedConnectionAction = HTTPConnectionPool.StateMachine.EstablishedConnectionAction
Expand All @@ -29,16 +30,21 @@ extension HTTPConnectionPool {

private(set) var requests: RequestQueue
private(set) var lifecycleState: StateMachine.LifecycleState
/// The property was introduced to fail fast during testing.
/// Otherwise this should always be true and not turned off.
private let retryConnectionEstablishment: Bool

init(
idGenerator: Connection.ID.Generator,
maximumConcurrentConnections: Int,
retryConnectionEstablishment: Bool,
lifecycleState: StateMachine.LifecycleState
) {
self.connections = HTTP1Connections(
maximumConcurrentConnections: maximumConcurrentConnections,
generator: idGenerator
)
self.retryConnectionEstablishment = retryConnectionEstablishment

self.requests = RequestQueue()
self.lifecycleState = lifecycleState
Expand Down Expand Up @@ -219,6 +225,17 @@ extension HTTPConnectionPool {

switch self.lifecycleState {
case .running:
guard self.retryConnectionEstablishment else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using the guard here it might be nicer to go with where clause in the switch...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A where constraint only applies to the last case if multiple cases are defined e.g.

case .running, .shuttingDown where self.retryConnectionEstablishment == true:

the where constrain only applies to .shuttingDown but not .running. We had an issue in AHC which we debugged together and decided we should stop using where clauses in switch cases altogether because it is not obvious.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true, however here we only have one case here. Anyway, not a hill I want to die on. If we want to stick with an inline if/guard, I would prefer this to be an if self.retryConnectionEstablishment {} and then move the retry logic into the if.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the benefit of the if? Guard at least guarantees that we return, if can fall through which we don't want.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I consider it nicer to read. But that's personal...

guard let (index, _) = self.connections.failConnection(connectionID) else {
preconditionFailure("A connection attempt failed, that the state machine knows nothing about. Somewhere state was lost.")
}
self.connections.removeConnection(at: index)

return .init(
request: self.failAllRequests(reason: error),
connection: .none
)
}
// We don't care how many waiting requests we have at this point, we will schedule a
// retry. More tasks, may appear until the backoff has completed. The final
// decision about the retry will be made in `connectionCreationBackoffDone(_:)`
Expand Down Expand Up @@ -523,6 +540,14 @@ extension HTTPConnectionPool {
return .none
}

private mutating func failAllRequests(reason error: Error) -> RequestAction {
let allRequests = self.requests.removeAll()
guard !allRequests.isEmpty else {
return .none
}
return .failRequestsAndCancelTimeouts(allRequests, error)
}

// MARK: HTTP2

mutating func newHTTP2MaxConcurrentStreamsReceived(_ connectionID: Connection.ID, newMaxStreams: Int) -> Action {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import NIOHTTP2
extension HTTPConnectionPool {
struct HTTP2StateMachine {
typealias Action = HTTPConnectionPool.StateMachine.Action
typealias RequestAction = HTTPConnectionPool.StateMachine.RequestAction
typealias ConnectionMigrationAction = HTTPConnectionPool.StateMachine.ConnectionMigrationAction
typealias EstablishedAction = HTTPConnectionPool.StateMachine.EstablishedAction
typealias EstablishedConnectionAction = HTTPConnectionPool.StateMachine.EstablishedConnectionAction
Expand All @@ -33,16 +34,21 @@ extension HTTPConnectionPool {
private let idGenerator: Connection.ID.Generator

private(set) var lifecycleState: StateMachine.LifecycleState
/// The property was introduced to fail fast during testing.
/// Otherwise this should always be true and not turned off.
private let retryConnectionEstablishment: Bool

init(
idGenerator: Connection.ID.Generator,
retryConnectionEstablishment: Bool,
lifecycleState: StateMachine.LifecycleState
) {
self.idGenerator = idGenerator
self.requests = RequestQueue()

self.connections = HTTP2Connections(generator: idGenerator)
self.lifecycleState = lifecycleState
self.retryConnectionEstablishment = retryConnectionEstablishment
}

mutating func migrateFromHTTP1(
Expand Down Expand Up @@ -398,16 +404,30 @@ extension HTTPConnectionPool {
}

mutating func failedToCreateNewConnection(_ error: Error, connectionID: Connection.ID) -> Action {
// TODO: switch over state https://github.com/swift-server/async-http-client/issues/638
self.failedConsecutiveConnectionAttempts += 1
self.lastConnectFailure = error

guard self.retryConnectionEstablishment else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we check the current running state here? Like in the http1 case? This seems weird. I think we should add this here. If the pool is closing, we should not backoff anymore.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah agree this is weird but has nothing to do with this change as we don't check the state at all. We should tackle this in a separate PR as we likely want to add test.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

guard let (index, _) = self.connections.failConnection(connectionID) else {
preconditionFailure("A connection attempt failed, that the state machine knows nothing about. Somewhere state was lost.")
}
self.connections.removeConnection(at: index)

return .init(
request: self.failAllRequests(reason: error),
connection: .none
)
}

let eventLoop = self.connections.backoffNextConnectionAttempt(connectionID)
let backoff = calculateBackoff(failedAttempt: self.failedConsecutiveConnectionAttempts)
return .init(request: .none, connection: .scheduleBackoffTimer(connectionID, backoff: backoff, on: eventLoop))
}

mutating func waitingForConnectivity(_ error: Error, connectionID: Connection.ID) -> Action {
self.lastConnectFailure = error

return .init(request: .none, connection: .none)
}

Expand All @@ -421,6 +441,14 @@ extension HTTPConnectionPool {
return self.nextActionForFailedConnection(at: index, on: context.eventLoop)
}

private mutating func failAllRequests(reason error: Error) -> RequestAction {
let allRequests = self.requests.removeAll()
guard !allRequests.isEmpty else {
return .none
}
return .failRequestsAndCancelTimeouts(allRequests, error)
}

mutating func timeoutRequest(_ requestID: Request.ID) -> Action {
// 1. check requests in queue
if let request = self.requests.remove(requestID) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,22 @@ extension HTTPConnectionPool {

let idGenerator: Connection.ID.Generator
let maximumConcurrentHTTP1Connections: Int

init(idGenerator: Connection.ID.Generator, maximumConcurrentHTTP1Connections: Int) {
/// The property was introduced to fail fast during testing.
/// Otherwise this should always be true and not turned off.
private let retryConnectionEstablishment: Bool

init(
idGenerator: Connection.ID.Generator,
maximumConcurrentHTTP1Connections: Int,
retryConnectionEstablishment: Bool
) {
self.maximumConcurrentHTTP1Connections = maximumConcurrentHTTP1Connections
self.retryConnectionEstablishment = retryConnectionEstablishment
self.idGenerator = idGenerator
let http1State = HTTP1StateMachine(
idGenerator: idGenerator,
maximumConcurrentConnections: maximumConcurrentHTTP1Connections,
retryConnectionEstablishment: retryConnectionEstablishment,
lifecycleState: .running
)
self.state = .http1(http1State)
Expand All @@ -127,6 +136,7 @@ extension HTTPConnectionPool {
var http1StateMachine = HTTP1StateMachine(
idGenerator: self.idGenerator,
maximumConcurrentConnections: self.maximumConcurrentHTTP1Connections,
retryConnectionEstablishment: self.retryConnectionEstablishment,
lifecycleState: http2StateMachine.lifecycleState
)

Expand All @@ -147,6 +157,7 @@ extension HTTPConnectionPool {

var http2StateMachine = HTTP2StateMachine(
idGenerator: self.idGenerator,
retryConnectionEstablishment: self.retryConnectionEstablishment,
lifecycleState: http1StateMachine.lifecycleState
)
let migrationAction = http2StateMachine.migrateFromHTTP1(
Expand Down
10 changes: 10 additions & 0 deletions Sources/AsyncHTTPClient/HTTPClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -975,13 +975,23 @@ extension HTTPClient.Configuration {
/// an explicit eventLoopRequirement are sent, this number might be exceeded due to overflow connections.
public var concurrentHTTP1ConnectionsPerHostSoftLimit: Int

/// If true, ``HTTPClient`` will try to create new connections on connection failure with an exponential backoff.
/// Requests will only fail after the ``HTTPClient/Configuration/Timeout-swift.struct/connect`` timeout exceeded.
/// If false, all requests that have no assigned connection will fail immediately after a connection could not be established.
/// Defaults to `true`.
/// - warning: We highly recommend leaving this on.
/// It is very common that connections establishment is flaky at scale.
/// ``HTTPClient`` will automatically mitigate these kind of issues if this flag is turned on.
var retryConnectionEstablishment: Bool

public init(idleTimeout: TimeAmount = .seconds(60)) {
self.init(idleTimeout: idleTimeout, concurrentHTTP1ConnectionsPerHostSoftLimit: 8)
}

public init(idleTimeout: TimeAmount, concurrentHTTP1ConnectionsPerHostSoftLimit: Int) {
self.idleTimeout = idleTimeout
self.concurrentHTTP1ConnectionsPerHostSoftLimit = concurrentHTTP1ConnectionsPerHostSoftLimit
self.retryConnectionEstablishment = true
}
}

Expand Down
2 changes: 1 addition & 1 deletion Tests/AsyncHTTPClientTests/AsyncAwaitEndToEndTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ final class AsyncAwaitEndToEndTests: XCTestCase {
return XCTFail("unexpected error \(error)")
}
// a race between deadline and connect timer can result in either error
XCTAssertTrue([.deadlineExceeded, .connectTimeout].contains(error))
XCTAssertTrue([.deadlineExceeded, .connectTimeout].contains(error), "unexpected error \(error)")
}
}
#endif
Expand Down
3 changes: 3 additions & 0 deletions Tests/AsyncHTTPClientTests/HTTPClientNIOTSTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class HTTPClientNIOTSTests: XCTestCase {
let httpBin = HTTPBin(.http1_1(ssl: true))
var config = HTTPClient.Configuration()
config.networkFrameworkWaitForConnectivity = false
config.connectionPool.retryConnectionEstablishment = false
let httpClient = HTTPClient(eventLoopGroupProvider: .shared(self.clientGroup),
configuration: config)
defer {
Expand Down Expand Up @@ -85,6 +86,7 @@ class HTTPClientNIOTSTests: XCTestCase {
let httpBin = HTTPBin(.http1_1(ssl: false))
var config = HTTPClient.Configuration()
config.networkFrameworkWaitForConnectivity = false
config.connectionPool.retryConnectionEstablishment = false

let httpClient = HTTPClient(eventLoopGroupProvider: .shared(self.clientGroup),
configuration: config)
Expand Down Expand Up @@ -140,6 +142,7 @@ class HTTPClientNIOTSTests: XCTestCase {

var clientConfig = HTTPClient.Configuration(tlsConfiguration: tlsConfig)
clientConfig.networkFrameworkWaitForConnectivity = false
clientConfig.connectionPool.retryConnectionEstablishment = false
let httpClient = HTTPClient(
eventLoopGroupProvider: .shared(self.clientGroup),
configuration: clientConfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ extension HTTPConnectionPool_HTTP1StateMachineTests {
static var allTests: [(String, (HTTPConnectionPool_HTTP1StateMachineTests) -> () throws -> Void)] {
return [
("testCreatingAndFailingConnections", testCreatingAndFailingConnections),
("testCreatingAndFailingConnectionsWithoutRetry", testCreatingAndFailingConnectionsWithoutRetry),
("testConnectionFailureBackoff", testConnectionFailureBackoff),
("testCancelRequestWorks", testCancelRequestWorks),
("testExecuteOnShuttingDownPool", testExecuteOnShuttingDownPool),
Expand Down
Loading