ConnectionManager.swift 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169
  1. /*
  2. * Copyright 2020, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import Foundation
  17. import Logging
  18. import NIOConcurrencyHelpers
  19. import NIOCore
  20. import NIOHTTP2
  21. // Unchecked because mutable state is always accessed and modified on a particular event loop.
  22. // APIs which _may_ be called from different threads execute onto the correct event loop first.
  23. // APIs which _must_ be called from an exact event loop have preconditions checking that the correct
  24. // event loop is being used.
  25. @usableFromInline
  26. internal final class ConnectionManager: @unchecked Sendable {
  27. internal enum Reconnect {
  28. case none
  29. case after(TimeInterval)
  30. }
  31. internal struct ConnectingState {
  32. var backoffIterator: ConnectionBackoffIterator?
  33. var reconnect: Reconnect
  34. var connectError: Error?
  35. var candidate: EventLoopFuture<Channel>
  36. var readyChannelMuxPromise: EventLoopPromise<HTTP2StreamMultiplexer>
  37. var candidateMuxPromise: EventLoopPromise<HTTP2StreamMultiplexer>
  38. }
  39. internal struct ConnectedState {
  40. var backoffIterator: ConnectionBackoffIterator?
  41. var reconnect: Reconnect
  42. var candidate: Channel
  43. var readyChannelMuxPromise: EventLoopPromise<HTTP2StreamMultiplexer>
  44. var multiplexer: HTTP2StreamMultiplexer
  45. var error: Error?
  46. init(from state: ConnectingState, candidate: Channel, multiplexer: HTTP2StreamMultiplexer) {
  47. self.backoffIterator = state.backoffIterator
  48. self.reconnect = state.reconnect
  49. self.candidate = candidate
  50. self.readyChannelMuxPromise = state.readyChannelMuxPromise
  51. self.multiplexer = multiplexer
  52. }
  53. }
  54. internal struct ReadyState {
  55. var channel: Channel
  56. var multiplexer: HTTP2StreamMultiplexer
  57. var error: Error?
  58. init(from state: ConnectedState) {
  59. self.channel = state.candidate
  60. self.multiplexer = state.multiplexer
  61. }
  62. }
  63. internal struct TransientFailureState {
  64. var backoffIterator: ConnectionBackoffIterator?
  65. var readyChannelMuxPromise: EventLoopPromise<HTTP2StreamMultiplexer>
  66. var scheduled: Scheduled<Void>
  67. var reason: Error
  68. init(from state: ConnectingState, scheduled: Scheduled<Void>, reason: Error?) {
  69. self.backoffIterator = state.backoffIterator
  70. self.readyChannelMuxPromise = state.readyChannelMuxPromise
  71. self.scheduled = scheduled
  72. self.reason =
  73. reason
  74. ?? GRPCStatus(
  75. code: .unavailable,
  76. message: "Unexpected connection drop"
  77. )
  78. }
  79. init(from state: ConnectedState, scheduled: Scheduled<Void>) {
  80. self.backoffIterator = state.backoffIterator
  81. self.readyChannelMuxPromise = state.readyChannelMuxPromise
  82. self.scheduled = scheduled
  83. self.reason =
  84. state.error
  85. ?? GRPCStatus(
  86. code: .unavailable,
  87. message: "Unexpected connection drop"
  88. )
  89. }
  90. init(
  91. from state: ReadyState,
  92. scheduled: Scheduled<Void>,
  93. backoffIterator: ConnectionBackoffIterator?
  94. ) {
  95. self.backoffIterator = backoffIterator
  96. self.readyChannelMuxPromise = state.channel.eventLoop.makePromise()
  97. self.scheduled = scheduled
  98. self.reason =
  99. state.error
  100. ?? GRPCStatus(
  101. code: .unavailable,
  102. message: "Unexpected connection drop"
  103. )
  104. }
  105. }
  106. internal struct ShutdownState {
  107. var closeFuture: EventLoopFuture<Void>
  108. /// The reason we are shutdown. Any requests for a `Channel` in this state will be failed with
  109. /// this error.
  110. var reason: Error
  111. init(closeFuture: EventLoopFuture<Void>, reason: Error) {
  112. self.closeFuture = closeFuture
  113. self.reason = reason
  114. }
  115. static func shutdownByUser(closeFuture: EventLoopFuture<Void>) -> ShutdownState {
  116. return ShutdownState(
  117. closeFuture: closeFuture,
  118. reason: GRPCStatus(code: .unavailable, message: "Connection was shutdown by the user")
  119. )
  120. }
  121. }
  122. internal enum State {
  123. /// No `Channel` is required.
  124. ///
  125. /// Valid next states:
  126. /// - `connecting`
  127. /// - `shutdown`
  128. case idle(lastError: Error?)
  129. /// We're actively trying to establish a connection.
  130. ///
  131. /// Valid next states:
  132. /// - `active`
  133. /// - `transientFailure` (if our attempt fails and we're going to try again)
  134. /// - `shutdown`
  135. case connecting(ConnectingState)
  136. /// We've established a `Channel`, it might not be suitable (TLS handshake may fail, etc.).
  137. /// Our signal to be 'ready' is the initial HTTP/2 SETTINGS frame.
  138. ///
  139. /// Valid next states:
  140. /// - `ready`
  141. /// - `transientFailure` (if we our handshake fails or other error happens and we can attempt
  142. /// to re-establish the connection)
  143. /// - `shutdown`
  144. case active(ConnectedState)
  145. /// We have an active `Channel` which has seen the initial HTTP/2 SETTINGS frame. We can use
  146. /// the channel for making RPCs.
  147. ///
  148. /// Valid next states:
  149. /// - `idle` (we're not serving any RPCs, we can drop the connection for now)
  150. /// - `transientFailure` (we encountered an error and will re-establish the connection)
  151. /// - `shutdown`
  152. case ready(ReadyState)
  153. /// A `Channel` is desired, we'll attempt to create one in the future.
  154. ///
  155. /// Valid next states:
  156. /// - `connecting`
  157. /// - `shutdown`
  158. case transientFailure(TransientFailureState)
  159. /// We never want another `Channel`: this state is terminal.
  160. case shutdown(ShutdownState)
  161. fileprivate var label: String {
  162. switch self {
  163. case .idle:
  164. return "idle"
  165. case .connecting:
  166. return "connecting"
  167. case .active:
  168. return "active"
  169. case .ready:
  170. return "ready"
  171. case .transientFailure:
  172. return "transientFailure"
  173. case .shutdown:
  174. return "shutdown"
  175. }
  176. }
  177. }
  178. /// The last 'external' state we are in, a subset of the internal state.
  179. private var externalState: _ConnectivityState = .idle(nil)
  180. /// Update the external state, potentially notifying a delegate about the change.
  181. private func updateExternalState(to nextState: _ConnectivityState) {
  182. if !self.externalState.isSameState(as: nextState) {
  183. let oldState = self.externalState
  184. self.externalState = nextState
  185. self.connectivityDelegate?.connectionStateDidChange(self, from: oldState, to: nextState)
  186. }
  187. }
  188. /// Our current state.
  189. private var state: State {
  190. didSet {
  191. switch self.state {
  192. case let .idle(error):
  193. self.updateExternalState(to: .idle(error))
  194. self.updateConnectionID()
  195. case .connecting:
  196. self.updateExternalState(to: .connecting)
  197. // This is an internal state.
  198. case .active:
  199. ()
  200. case .ready:
  201. self.updateExternalState(to: .ready)
  202. case let .transientFailure(state):
  203. self.updateExternalState(to: .transientFailure(state.reason))
  204. self.updateConnectionID()
  205. case .shutdown:
  206. self.updateExternalState(to: .shutdown)
  207. }
  208. }
  209. }
  210. /// Returns whether the state is 'idle'.
  211. private var isIdle: Bool {
  212. self.eventLoop.assertInEventLoop()
  213. switch self.state {
  214. case .idle:
  215. return true
  216. case .connecting, .transientFailure, .active, .ready, .shutdown:
  217. return false
  218. }
  219. }
  220. /// Returns whether the state is 'shutdown'.
  221. private var isShutdown: Bool {
  222. self.eventLoop.assertInEventLoop()
  223. switch self.state {
  224. case .shutdown:
  225. return true
  226. case .idle, .connecting, .transientFailure, .active, .ready:
  227. return false
  228. }
  229. }
  230. /// Returns the `HTTP2StreamMultiplexer` from the 'ready' state or `nil` if it is not available.
  231. private var multiplexer: HTTP2StreamMultiplexer? {
  232. self.eventLoop.assertInEventLoop()
  233. switch self.state {
  234. case let .ready(state):
  235. return state.multiplexer
  236. case .idle, .connecting, .transientFailure, .active, .shutdown:
  237. return nil
  238. }
  239. }
  240. /// The `EventLoop` that the managed connection will run on.
  241. internal let eventLoop: EventLoop
  242. /// A delegate for connectivity changes. Executed on the `EventLoop`.
  243. private var connectivityDelegate: ConnectionManagerConnectivityDelegate?
  244. /// A delegate for HTTP/2 connection changes. Executed on the `EventLoop`.
  245. private var http2Delegate: ConnectionManagerHTTP2Delegate?
  246. /// An `EventLoopFuture<Channel>` provider.
  247. private let channelProvider: ConnectionManagerChannelProvider
  248. /// The behavior for starting a call, i.e. how patient is the caller when asking for a
  249. /// multiplexer.
  250. private let callStartBehavior: CallStartBehavior.Behavior
  251. /// The configuration to use when backing off between connection attempts, if reconnection
  252. /// attempts should be made at all.
  253. private let connectionBackoff: ConnectionBackoff?
  254. /// A logger.
  255. internal var logger: Logger
  256. private let connectionID: String
  257. private var channelNumber: UInt64
  258. private var channelNumberLock = NIOLock()
  259. private var _connectionIDAndNumber: String {
  260. return "\(self.connectionID)/\(self.channelNumber)"
  261. }
  262. private var connectionIDAndNumber: String {
  263. return self.channelNumberLock.withLock {
  264. return self._connectionIDAndNumber
  265. }
  266. }
  267. private func updateConnectionID() {
  268. self.channelNumberLock.withLock {
  269. self.channelNumber &+= 1
  270. self.logger[metadataKey: MetadataKey.connectionID] = "\(self._connectionIDAndNumber)"
  271. }
  272. }
  273. internal func appendMetadata(to logger: inout Logger) {
  274. logger[metadataKey: MetadataKey.connectionID] = "\(self.connectionIDAndNumber)"
  275. }
  276. internal convenience init(
  277. configuration: ClientConnection.Configuration,
  278. channelProvider: ConnectionManagerChannelProvider? = nil,
  279. connectivityDelegate: ConnectionManagerConnectivityDelegate?,
  280. logger: Logger
  281. ) {
  282. self.init(
  283. eventLoop: configuration.eventLoopGroup.next(),
  284. channelProvider: channelProvider ?? DefaultChannelProvider(configuration: configuration),
  285. callStartBehavior: configuration.callStartBehavior.wrapped,
  286. connectionBackoff: configuration.connectionBackoff,
  287. connectivityDelegate: connectivityDelegate,
  288. http2Delegate: nil,
  289. logger: logger
  290. )
  291. }
  292. internal init(
  293. eventLoop: EventLoop,
  294. channelProvider: ConnectionManagerChannelProvider,
  295. callStartBehavior: CallStartBehavior.Behavior,
  296. connectionBackoff: ConnectionBackoff?,
  297. connectivityDelegate: ConnectionManagerConnectivityDelegate?,
  298. http2Delegate: ConnectionManagerHTTP2Delegate?,
  299. logger: Logger
  300. ) {
  301. // Setup the logger.
  302. var logger = logger
  303. let connectionID = UUID().uuidString
  304. let channelNumber: UInt64 = 0
  305. logger[metadataKey: MetadataKey.connectionID] = "\(connectionID)/\(channelNumber)"
  306. self.eventLoop = eventLoop
  307. self.state = .idle(lastError: nil)
  308. self.channelProvider = channelProvider
  309. self.callStartBehavior = callStartBehavior
  310. self.connectionBackoff = connectionBackoff
  311. self.connectivityDelegate = connectivityDelegate
  312. self.http2Delegate = http2Delegate
  313. self.connectionID = connectionID
  314. self.channelNumber = channelNumber
  315. self.logger = logger
  316. }
  317. /// Get the multiplexer from the underlying channel handling gRPC calls.
  318. /// if the `ConnectionManager` was configured to be `fastFailure` this will have
  319. /// one chance to connect - if not reconnections are managed here.
  320. internal func getHTTP2Multiplexer() -> EventLoopFuture<HTTP2StreamMultiplexer> {
  321. func getHTTP2Multiplexer0() -> EventLoopFuture<HTTP2StreamMultiplexer> {
  322. switch self.callStartBehavior {
  323. case .waitsForConnectivity:
  324. return self.getHTTP2MultiplexerPatient()
  325. case .fastFailure:
  326. return self.getHTTP2MultiplexerOptimistic()
  327. }
  328. }
  329. if self.eventLoop.inEventLoop {
  330. return getHTTP2Multiplexer0()
  331. } else {
  332. return self.eventLoop.flatSubmit {
  333. getHTTP2Multiplexer0()
  334. }
  335. }
  336. }
  337. /// Returns a future for the multiplexer which succeeded when the channel is connected.
  338. /// Reconnects are handled if necessary.
  339. private func getHTTP2MultiplexerPatient() -> EventLoopFuture<HTTP2StreamMultiplexer> {
  340. let multiplexer: EventLoopFuture<HTTP2StreamMultiplexer>
  341. switch self.state {
  342. case .idle:
  343. self.startConnecting()
  344. // We started connecting so we must transition to the `connecting` state.
  345. guard case let .connecting(connecting) = self.state else {
  346. self.unreachableState()
  347. }
  348. multiplexer = connecting.readyChannelMuxPromise.futureResult
  349. case let .connecting(state):
  350. multiplexer = state.readyChannelMuxPromise.futureResult
  351. case let .active(state):
  352. multiplexer = state.readyChannelMuxPromise.futureResult
  353. case let .ready(state):
  354. multiplexer = self.eventLoop.makeSucceededFuture(state.multiplexer)
  355. case let .transientFailure(state):
  356. multiplexer = state.readyChannelMuxPromise.futureResult
  357. case let .shutdown(state):
  358. multiplexer = self.eventLoop.makeFailedFuture(state.reason)
  359. }
  360. self.logger.debug(
  361. "vending multiplexer future",
  362. metadata: [
  363. "connectivity_state": "\(self.state.label)"
  364. ]
  365. )
  366. return multiplexer
  367. }
  368. /// Returns a future for the current HTTP/2 stream multiplexer, or future HTTP/2 stream multiplexer from the current connection
  369. /// attempt, or if the state is 'idle' returns the future for the next connection attempt.
  370. ///
  371. /// Note: if the state is 'transientFailure' or 'shutdown' then a failed future will be returned.
  372. private func getHTTP2MultiplexerOptimistic() -> EventLoopFuture<HTTP2StreamMultiplexer> {
  373. // `getHTTP2Multiplexer` makes sure we're on the event loop but let's just be sure.
  374. self.eventLoop.preconditionInEventLoop()
  375. let muxFuture: EventLoopFuture<HTTP2StreamMultiplexer> = { () in
  376. switch self.state {
  377. case .idle:
  378. self.startConnecting()
  379. // We started connecting so we must transition to the `connecting` state.
  380. guard case let .connecting(connecting) = self.state else {
  381. self.unreachableState()
  382. }
  383. return connecting.candidateMuxPromise.futureResult
  384. case let .connecting(state):
  385. return state.candidateMuxPromise.futureResult
  386. case let .active(active):
  387. return self.eventLoop.makeSucceededFuture(active.multiplexer)
  388. case let .ready(ready):
  389. return self.eventLoop.makeSucceededFuture(ready.multiplexer)
  390. case let .transientFailure(state):
  391. return self.eventLoop.makeFailedFuture(state.reason)
  392. case let .shutdown(state):
  393. return self.eventLoop.makeFailedFuture(state.reason)
  394. }
  395. }()
  396. self.logger.debug(
  397. "vending fast-failing multiplexer future",
  398. metadata: [
  399. "connectivity_state": "\(self.state.label)"
  400. ]
  401. )
  402. return muxFuture
  403. }
  404. @usableFromInline
  405. internal enum ShutdownMode {
  406. /// Closes the underlying channel without waiting for existing RPCs to complete.
  407. case forceful
  408. /// Allows running RPCs to run their course before closing the underlying channel. No new
  409. /// streams may be created.
  410. case graceful(NIODeadline)
  411. }
  412. /// Shutdown the underlying connection.
  413. ///
  414. /// - Note: Initiating a `forceful` shutdown after a `graceful` shutdown has no effect.
  415. internal func shutdown(mode: ShutdownMode) -> EventLoopFuture<Void> {
  416. let promise = self.eventLoop.makePromise(of: Void.self)
  417. self.shutdown(mode: mode, promise: promise)
  418. return promise.futureResult
  419. }
  420. /// Shutdown the underlying connection.
  421. ///
  422. /// - Note: Initiating a `forceful` shutdown after a `graceful` shutdown has no effect.
  423. internal func shutdown(mode: ShutdownMode, promise: EventLoopPromise<Void>) {
  424. if self.eventLoop.inEventLoop {
  425. self._shutdown(mode: mode, promise: promise)
  426. } else {
  427. self.eventLoop.execute {
  428. self._shutdown(mode: mode, promise: promise)
  429. }
  430. }
  431. }
  432. private func _shutdown(mode: ShutdownMode, promise: EventLoopPromise<Void>) {
  433. self.logger.debug(
  434. "shutting down connection",
  435. metadata: [
  436. "connectivity_state": "\(self.state.label)",
  437. "shutdown.mode": "\(mode)",
  438. ]
  439. )
  440. switch self.state {
  441. // We don't have a channel and we don't want one, easy!
  442. case .idle:
  443. let shutdown: ShutdownState = .shutdownByUser(closeFuture: promise.futureResult)
  444. self.state = .shutdown(shutdown)
  445. promise.succeed(())
  446. // We're mid-connection: the application doesn't have any 'ready' channels so we'll succeed
  447. // the shutdown future and deal with any fallout from the connecting channel without the
  448. // application knowing.
  449. case let .connecting(state):
  450. let shutdown: ShutdownState = .shutdownByUser(closeFuture: promise.futureResult)
  451. self.state = .shutdown(shutdown)
  452. // Fail the ready channel mux promise: we're shutting down so even if we manage to successfully
  453. // connect the application shouldn't have access to the channel or multiplexer.
  454. state.readyChannelMuxPromise.fail(GRPCStatus(code: .unavailable, message: nil))
  455. state.candidateMuxPromise.fail(GRPCStatus(code: .unavailable, message: nil))
  456. // Complete the shutdown promise when the connection attempt has completed.
  457. state.candidate.whenComplete {
  458. switch $0 {
  459. case let .success(channel):
  460. // In case we do successfully connect, close on the next loop tick. When connecting a
  461. // channel NIO will complete the promise for the channel before firing channel active.
  462. // That means we may close and fire inactive before active which HTTP/2 will be unhappy
  463. // about.
  464. self.eventLoop.execute {
  465. channel.close(mode: .all, promise: nil)
  466. promise.completeWith(channel.closeFuture.recoveringFromUncleanShutdown())
  467. }
  468. case .failure:
  469. // We failed to connect, that's fine we still shutdown successfully.
  470. promise.succeed(())
  471. }
  472. }
  473. // We have an active channel but the application doesn't know about it yet. We'll do the same
  474. // as for `.connecting`.
  475. case let .active(state):
  476. let shutdown: ShutdownState = .shutdownByUser(closeFuture: promise.futureResult)
  477. self.state = .shutdown(shutdown)
  478. // Fail the ready channel mux promise: we're shutting down so even if we manage to successfully
  479. // connect the application shouldn't have access to the channel or multiplexer.
  480. state.readyChannelMuxPromise.fail(GRPCStatus(code: .unavailable, message: nil))
  481. // We have a channel, close it. We only create streams in the ready state so there's no need
  482. // to quiesce here.
  483. state.candidate.close(mode: .all, promise: nil)
  484. promise.completeWith(state.candidate.closeFuture.recoveringFromUncleanShutdown())
  485. // The channel is up and running: the application could be using it. We can close it and
  486. // return the `closeFuture`.
  487. case let .ready(state):
  488. let shutdown: ShutdownState = .shutdownByUser(closeFuture: promise.futureResult)
  489. self.state = .shutdown(shutdown)
  490. switch mode {
  491. case .forceful:
  492. // We have a channel, close it.
  493. state.channel.close(mode: .all, promise: nil)
  494. case let .graceful(deadline):
  495. // If we don't close by the deadline forcibly close the channel.
  496. let scheduledForceClose = state.channel.eventLoop.scheduleTask(deadline: deadline) {
  497. self.logger.info("shutdown timer expired, forcibly closing connection")
  498. state.channel.close(mode: .all, promise: nil)
  499. }
  500. // Cancel the force close if we close normally first.
  501. state.channel.closeFuture.whenComplete { _ in
  502. scheduledForceClose.cancel()
  503. }
  504. // Tell the channel to quiesce. It will be picked up by the idle handler which will close
  505. // the channel when all streams have been closed.
  506. state.channel.pipeline.fireUserInboundEventTriggered(ChannelShouldQuiesceEvent())
  507. }
  508. // Complete the promise when we eventually close.
  509. promise.completeWith(state.channel.closeFuture.recoveringFromUncleanShutdown())
  510. // Like `.connecting` and `.active` the application does not have a `.ready` channel. We'll
  511. // do the same but also cancel any scheduled connection attempts and deal with any fallout
  512. // if we cancelled too late.
  513. case let .transientFailure(state):
  514. let shutdown: ShutdownState = .shutdownByUser(closeFuture: promise.futureResult)
  515. self.state = .shutdown(shutdown)
  516. // Stop the creation of a new channel, if we can. If we can't then the task to
  517. // `startConnecting()` will see our new `shutdown` state and ignore the request to connect.
  518. state.scheduled.cancel()
  519. // Fail the ready channel mux promise: we're shutting down so even if we manage to successfully
  520. // connect the application shouldn't should have access to the channel.
  521. state.readyChannelMuxPromise.fail(shutdown.reason)
  522. // No active channel, so complete the shutdown promise now.
  523. promise.succeed(())
  524. // We're already shutdown; there's nothing to do.
  525. case let .shutdown(state):
  526. promise.completeWith(state.closeFuture)
  527. }
  528. }
  529. /// Registers a callback which fires when the current active connection is closed.
  530. ///
  531. /// If there is a connection, the callback will be invoked with `true` when the connection is
  532. /// closed. Otherwise the callback is invoked with `false`.
  533. internal func onCurrentConnectionClose(_ onClose: @escaping (Bool) -> Void) {
  534. if self.eventLoop.inEventLoop {
  535. self._onCurrentConnectionClose(onClose)
  536. } else {
  537. self.eventLoop.execute {
  538. self._onCurrentConnectionClose(onClose)
  539. }
  540. }
  541. }
  542. private func _onCurrentConnectionClose(_ onClose: @escaping (Bool) -> Void) {
  543. self.eventLoop.assertInEventLoop()
  544. switch self.state {
  545. case let .ready(state):
  546. state.channel.closeFuture.whenComplete { _ in onClose(true) }
  547. case .idle, .connecting, .active, .transientFailure, .shutdown:
  548. onClose(false)
  549. }
  550. }
  551. // MARK: - State changes from the channel handler.
  552. /// The channel caught an error. Hold on to it until the channel becomes inactive, it may provide
  553. /// some context.
  554. internal func channelError(_ error: Error) {
  555. self.eventLoop.preconditionInEventLoop()
  556. switch self.state {
  557. // Hitting an error in idle is a surprise, but not really something we do anything about. Either the
  558. // error is channel fatal, in which case we'll see channelInactive soon (acceptable), or it's not,
  559. // and future I/O will either fail fast or work. In either case, all we do is log this and move on.
  560. case .idle:
  561. self.logger.warning(
  562. "ignoring unexpected error in idle",
  563. metadata: [
  564. MetadataKey.error: "\(error)"
  565. ]
  566. )
  567. case .connecting(var state):
  568. // Record the error, the channel promise will notify the manager of any error which occurs
  569. // while connecting. It may be overridden by this error if it contains more relevant
  570. // information
  571. if state.connectError == nil {
  572. state.connectError = error
  573. self.state = .connecting(state)
  574. // The pool is only notified of connection errors when the connection transitions to the
  575. // transient failure state. However, in some cases (i.e. with NIOTS), errors can be thrown
  576. // during the connect but before the connect times out.
  577. //
  578. // This opens up a period of time where you can start a call and have it fail with
  579. // deadline exceeded (because no connection was available within the configured max
  580. // wait time for the pool) but without any diagnostic information. The information is
  581. // available but it hasn't been made available to the pool at that point in time.
  582. //
  583. // The delegate can't easily be modified (it's public API) and a new API doesn't make all
  584. // that much sense so we elect to check whether the delegate is the pool and call it
  585. // directly.
  586. if let pool = self.connectivityDelegate as? ConnectionPool {
  587. pool.sync.updateMostRecentError(error)
  588. }
  589. }
  590. case var .active(state):
  591. state.error = error
  592. self.state = .active(state)
  593. case var .ready(state):
  594. state.error = error
  595. self.state = .ready(state)
  596. // If we've already in one of these states, then additional errors aren't helpful to us.
  597. case .transientFailure, .shutdown:
  598. ()
  599. }
  600. }
  601. /// The connecting channel became `active`. Must be called on the `EventLoop`.
  602. internal func channelActive(channel: Channel, multiplexer: HTTP2StreamMultiplexer) {
  603. self.eventLoop.preconditionInEventLoop()
  604. self.logger.debug(
  605. "activating connection",
  606. metadata: [
  607. "connectivity_state": "\(self.state.label)"
  608. ]
  609. )
  610. switch self.state {
  611. case let .connecting(connecting):
  612. let connected = ConnectedState(from: connecting, candidate: channel, multiplexer: multiplexer)
  613. self.state = .active(connected)
  614. // Optimistic connections are happy this this level of setup.
  615. connecting.candidateMuxPromise.succeed(multiplexer)
  616. // Application called shutdown before the channel become active; we should close it.
  617. case .shutdown:
  618. channel.close(mode: .all, promise: nil)
  619. case .idle, .transientFailure:
  620. // Received a channelActive when not connecting. Can happen if channelActive and
  621. // channelInactive are reordered. Ignore.
  622. ()
  623. case .active, .ready:
  624. // Received a second 'channelActive', already active so ignore.
  625. ()
  626. }
  627. }
  628. /// An established channel (i.e. `active` or `ready`) has become inactive: should we reconnect?
  629. /// Must be called on the `EventLoop`.
  630. internal func channelInactive() {
  631. self.eventLoop.preconditionInEventLoop()
  632. self.logger.debug(
  633. "deactivating connection",
  634. metadata: [
  635. "connectivity_state": "\(self.state.label)"
  636. ]
  637. )
  638. switch self.state {
  639. // We can hit inactive in connecting if we see channelInactive before channelActive; that's not
  640. // common but we should tolerate it.
  641. case let .connecting(connecting):
  642. // Should we try connecting again?
  643. switch connecting.reconnect {
  644. // No, shutdown instead.
  645. case .none:
  646. self.logger.debug("shutting down connection")
  647. let error = GRPCStatus(
  648. code: .unavailable,
  649. message: "The connection was dropped and connection re-establishment is disabled"
  650. )
  651. let shutdownState = ShutdownState(
  652. closeFuture: self.eventLoop.makeSucceededFuture(()),
  653. reason: error
  654. )
  655. self.state = .shutdown(shutdownState)
  656. // Shutting down, so fail the outstanding promises.
  657. connecting.readyChannelMuxPromise.fail(error)
  658. connecting.candidateMuxPromise.fail(error)
  659. // Yes, after some time.
  660. case let .after(delay):
  661. let error = GRPCStatus(code: .unavailable, message: "Connection closed while connecting")
  662. // Fail the candidate mux promise. KEep the 'readyChannelMuxPromise' as we'll try again.
  663. connecting.candidateMuxPromise.fail(error)
  664. let scheduled = self.eventLoop.scheduleTask(in: .seconds(timeInterval: delay)) {
  665. self.startConnecting()
  666. }
  667. self.logger.debug("scheduling connection attempt", metadata: ["delay_secs": "\(delay)"])
  668. self.state = .transientFailure(.init(from: connecting, scheduled: scheduled, reason: nil))
  669. }
  670. // The channel is `active` but not `ready`. Should we try again?
  671. case let .active(active):
  672. switch active.reconnect {
  673. // No, shutdown instead.
  674. case .none:
  675. self.logger.debug("shutting down connection")
  676. let error = GRPCStatus(
  677. code: .unavailable,
  678. message: "The connection was dropped and connection re-establishment is disabled"
  679. )
  680. let shutdownState = ShutdownState(
  681. closeFuture: self.eventLoop.makeSucceededFuture(()),
  682. reason: error
  683. )
  684. self.state = .shutdown(shutdownState)
  685. active.readyChannelMuxPromise.fail(error)
  686. // Yes, after some time.
  687. case let .after(delay):
  688. let scheduled = self.eventLoop.scheduleTask(in: .seconds(timeInterval: delay)) {
  689. self.startConnecting()
  690. }
  691. self.logger.debug("scheduling connection attempt", metadata: ["delay_secs": "\(delay)"])
  692. self.state = .transientFailure(TransientFailureState(from: active, scheduled: scheduled))
  693. }
  694. // The channel was ready and working fine but something went wrong. Should we try to replace
  695. // the channel?
  696. case let .ready(ready):
  697. // No, no backoff is configured.
  698. if self.connectionBackoff == nil {
  699. self.logger.debug("shutting down connection, no reconnect configured/remaining")
  700. self.state = .shutdown(
  701. ShutdownState(
  702. closeFuture: ready.channel.closeFuture,
  703. reason: GRPCStatus(
  704. code: .unavailable,
  705. message: "The connection was dropped and a reconnect was not configured"
  706. )
  707. )
  708. )
  709. } else {
  710. // Yes, start connecting now. We should go via `transientFailure`, however.
  711. let scheduled = self.eventLoop.scheduleTask(in: .nanoseconds(0)) {
  712. self.startConnecting()
  713. }
  714. self.logger.debug("scheduling connection attempt", metadata: ["delay": "0"])
  715. let backoffIterator = self.connectionBackoff?.makeIterator()
  716. self.state = .transientFailure(
  717. TransientFailureState(
  718. from: ready,
  719. scheduled: scheduled,
  720. backoffIterator: backoffIterator
  721. )
  722. )
  723. }
  724. // This is fine: we expect the channel to become inactive after becoming idle.
  725. case .idle:
  726. ()
  727. // We're already shutdown, that's fine.
  728. case .shutdown:
  729. ()
  730. // Received 'channelInactive' twice; fine, ignore.
  731. case .transientFailure:
  732. ()
  733. }
  734. }
  735. /// The channel has become ready, that is, it has seen the initial HTTP/2 SETTINGS frame. Must be
  736. /// called on the `EventLoop`.
  737. internal func ready() {
  738. self.eventLoop.preconditionInEventLoop()
  739. self.logger.debug(
  740. "connection ready",
  741. metadata: [
  742. "connectivity_state": "\(self.state.label)"
  743. ]
  744. )
  745. switch self.state {
  746. case let .active(connected):
  747. self.state = .ready(ReadyState(from: connected))
  748. connected.readyChannelMuxPromise.succeed(connected.multiplexer)
  749. case .shutdown:
  750. ()
  751. case .idle, .transientFailure:
  752. // No connection or connection attempt exists but connection was marked as ready. This is
  753. // strange. Ignore it in release mode as there's nothing to close and nowehere to fire an
  754. // error to.
  755. assertionFailure("received initial HTTP/2 SETTINGS frame in \(self.state.label) state")
  756. case .connecting:
  757. // No channel exists to receive initial HTTP/2 SETTINGS frame on... weird. Ignore in release
  758. // mode.
  759. assertionFailure("received initial HTTP/2 SETTINGS frame in \(self.state.label) state")
  760. case .ready:
  761. // Already received initial HTTP/2 SETTINGS frame; ignore in release mode.
  762. assertionFailure("received initial HTTP/2 SETTINGS frame in \(self.state.label) state")
  763. }
  764. }
  765. /// No active RPCs are happening on 'ready' channel: close the channel for now. Must be called on
  766. /// the `EventLoop`.
  767. internal func idle() {
  768. self.eventLoop.preconditionInEventLoop()
  769. self.logger.debug(
  770. "idling connection",
  771. metadata: [
  772. "connectivity_state": "\(self.state.label)"
  773. ]
  774. )
  775. switch self.state {
  776. case let .active(state):
  777. // This state is reachable if the keepalive timer fires before we reach the ready state.
  778. self.state = .idle(lastError: state.error)
  779. state.readyChannelMuxPromise
  780. .fail(GRPCStatus(code: .unavailable, message: "Idled before reaching ready state"))
  781. case let .ready(state):
  782. self.state = .idle(lastError: state.error)
  783. case .shutdown:
  784. // This is expected when the connection is closed by the user: when the channel becomes
  785. // inactive and there are no outstanding RPCs, 'idle()' will be called instead of
  786. // 'channelInactive()'.
  787. ()
  788. case .idle, .transientFailure:
  789. // There's no connection to idle; ignore.
  790. ()
  791. case .connecting:
  792. // The idle watchdog is started when the connection is active, this shouldn't happen
  793. // in the connecting state. Ignore it in release mode.
  794. assertionFailure("tried to idle a connection in the \(self.state.label) state")
  795. }
  796. }
  797. internal func streamOpened() {
  798. self.eventLoop.assertInEventLoop()
  799. self.http2Delegate?.streamOpened(self)
  800. }
  801. internal func streamClosed() {
  802. self.eventLoop.assertInEventLoop()
  803. self.http2Delegate?.streamClosed(self)
  804. }
  805. internal func maxConcurrentStreamsChanged(_ maxConcurrentStreams: Int) {
  806. self.eventLoop.assertInEventLoop()
  807. self.http2Delegate?.receivedSettingsMaxConcurrentStreams(
  808. self,
  809. maxConcurrentStreams: maxConcurrentStreams
  810. )
  811. }
  812. /// The connection has started quiescing: notify the connectivity monitor of this.
  813. internal func beginQuiescing() {
  814. self.eventLoop.assertInEventLoop()
  815. self.connectivityDelegate?.connectionIsQuiescing(self)
  816. }
  817. }
  818. extension ConnectionManager {
  819. // A connection attempt failed; we never established a connection.
  820. private func connectionFailed(withError error: Error) {
  821. self.eventLoop.preconditionInEventLoop()
  822. switch self.state {
  823. case let .connecting(connecting):
  824. let reportedError: Error
  825. switch error as? ChannelError {
  826. case .some(.connectTimeout):
  827. // A more relevant error may have been caught earlier. Use that in preference to the
  828. // timeout as it'll likely be more useful.
  829. reportedError = connecting.connectError ?? error
  830. default:
  831. reportedError = error
  832. }
  833. // Should we reconnect?
  834. switch connecting.reconnect {
  835. // No, shutdown.
  836. case .none:
  837. self.logger.debug("shutting down connection, no reconnect configured/remaining")
  838. self.state = .shutdown(
  839. ShutdownState(closeFuture: self.eventLoop.makeSucceededFuture(()), reason: reportedError)
  840. )
  841. connecting.readyChannelMuxPromise.fail(reportedError)
  842. connecting.candidateMuxPromise.fail(reportedError)
  843. // Yes, after a delay.
  844. case let .after(delay):
  845. self.logger.debug("scheduling connection attempt", metadata: ["delay": "\(delay)"])
  846. let scheduled = self.eventLoop.scheduleTask(in: .seconds(timeInterval: delay)) {
  847. self.startConnecting()
  848. }
  849. self.state = .transientFailure(
  850. TransientFailureState(from: connecting, scheduled: scheduled, reason: reportedError)
  851. )
  852. // Candidate mux users are not willing to wait.
  853. connecting.candidateMuxPromise.fail(reportedError)
  854. }
  855. // The application must have called shutdown while we were trying to establish a connection
  856. // which was doomed to fail anyway. That's fine, we can ignore this.
  857. case .shutdown:
  858. ()
  859. // Connection attempt failed, but no connection attempt is in progress.
  860. case .idle, .active, .ready, .transientFailure:
  861. // Nothing we can do other than ignore in release mode.
  862. assertionFailure("connect promise failed in \(self.state.label) state")
  863. }
  864. }
  865. }
  866. extension ConnectionManager {
  867. // Start establishing a connection: we can only do this from the `idle` and `transientFailure`
  868. // states. Must be called on the `EventLoop`.
  869. private func startConnecting() {
  870. self.eventLoop.assertInEventLoop()
  871. switch self.state {
  872. case .idle:
  873. let iterator = self.connectionBackoff?.makeIterator()
  874. // The iterator produces the connect timeout and the backoff to use for the next attempt. This
  875. // is unfortunate if retries is set to none because we need to connect timeout but not the
  876. // backoff yet the iterator will not return a value to us. To workaround this we grab the
  877. // connect timeout and override it.
  878. let connectTimeoutOverride: TimeAmount?
  879. if let backoff = self.connectionBackoff, backoff.retries == .none {
  880. connectTimeoutOverride = .seconds(timeInterval: backoff.minimumConnectionTimeout)
  881. } else {
  882. connectTimeoutOverride = nil
  883. }
  884. self.startConnecting(
  885. backoffIterator: iterator,
  886. muxPromise: self.eventLoop.makePromise(),
  887. connectTimeoutOverride: connectTimeoutOverride
  888. )
  889. case let .transientFailure(pending):
  890. self.startConnecting(
  891. backoffIterator: pending.backoffIterator,
  892. muxPromise: pending.readyChannelMuxPromise
  893. )
  894. // We shutdown before a scheduled connection attempt had started.
  895. case .shutdown:
  896. ()
  897. // We only call startConnecting() if the connection does not exist and after checking what the
  898. // current state is, so none of these states should be reachable.
  899. case .connecting:
  900. self.unreachableState()
  901. case .active:
  902. self.unreachableState()
  903. case .ready:
  904. self.unreachableState()
  905. }
  906. }
  907. private func startConnecting(
  908. backoffIterator: ConnectionBackoffIterator?,
  909. muxPromise: EventLoopPromise<HTTP2StreamMultiplexer>,
  910. connectTimeoutOverride: TimeAmount? = nil
  911. ) {
  912. let timeoutAndBackoff = backoffIterator?.next()
  913. // We're already on the event loop: submit the connect so it starts after we've made the
  914. // state change to `.connecting`.
  915. self.eventLoop.assertInEventLoop()
  916. let candidate: EventLoopFuture<Channel> = self.eventLoop.flatSubmit {
  917. let connectTimeout: TimeAmount?
  918. if let connectTimeoutOverride = connectTimeoutOverride {
  919. connectTimeout = connectTimeoutOverride
  920. } else {
  921. connectTimeout = timeoutAndBackoff.map { TimeAmount.seconds(timeInterval: $0.timeout) }
  922. }
  923. let channel: EventLoopFuture<Channel> = self.channelProvider.makeChannel(
  924. managedBy: self,
  925. onEventLoop: self.eventLoop,
  926. connectTimeout: connectTimeout,
  927. logger: self.logger
  928. )
  929. channel.whenFailure { error in
  930. self.connectionFailed(withError: error)
  931. }
  932. return channel
  933. }
  934. // Should we reconnect if the candidate channel fails?
  935. let reconnect: Reconnect = timeoutAndBackoff.map { .after($0.backoff) } ?? .none
  936. let connecting = ConnectingState(
  937. backoffIterator: backoffIterator,
  938. reconnect: reconnect,
  939. candidate: candidate,
  940. readyChannelMuxPromise: muxPromise,
  941. candidateMuxPromise: self.eventLoop.makePromise()
  942. )
  943. self.state = .connecting(connecting)
  944. }
  945. }
  946. extension ConnectionManager {
  947. /// Returns a synchronous view of the connection manager; each operation requires the caller to be
  948. /// executing on the same `EventLoop` as the connection manager.
  949. internal var sync: Sync {
  950. return Sync(self)
  951. }
  952. internal struct Sync {
  953. private let manager: ConnectionManager
  954. fileprivate init(_ manager: ConnectionManager) {
  955. self.manager = manager
  956. }
  957. /// A delegate for connectivity changes.
  958. internal var connectivityDelegate: ConnectionManagerConnectivityDelegate? {
  959. get {
  960. self.manager.eventLoop.assertInEventLoop()
  961. return self.manager.connectivityDelegate
  962. }
  963. nonmutating set {
  964. self.manager.eventLoop.assertInEventLoop()
  965. self.manager.connectivityDelegate = newValue
  966. }
  967. }
  968. /// A delegate for HTTP/2 connection changes.
  969. internal var http2Delegate: ConnectionManagerHTTP2Delegate? {
  970. get {
  971. self.manager.eventLoop.assertInEventLoop()
  972. return self.manager.http2Delegate
  973. }
  974. nonmutating set {
  975. self.manager.eventLoop.assertInEventLoop()
  976. self.manager.http2Delegate = newValue
  977. }
  978. }
  979. /// Returns `true` if the connection is in the idle state.
  980. internal var isIdle: Bool {
  981. return self.manager.isIdle
  982. }
  983. /// Returne `true` if the connection is in the shutdown state.
  984. internal var isShutdown: Bool {
  985. return self.manager.isShutdown
  986. }
  987. /// Returns the `multiplexer` from a connection in the `ready` state or `nil` if it is any
  988. /// other state.
  989. internal var multiplexer: HTTP2StreamMultiplexer? {
  990. return self.manager.multiplexer
  991. }
  992. // Start establishing a connection. Must only be called when `isIdle` is `true`.
  993. internal func startConnecting() {
  994. self.manager.startConnecting()
  995. }
  996. }
  997. }
  998. extension ConnectionManager {
  999. private func unreachableState(
  1000. function: StaticString = #function,
  1001. file: StaticString = #fileID,
  1002. line: UInt = #line
  1003. ) -> Never {
  1004. fatalError("Invalid state \(self.state) for \(function)", file: file, line: line)
  1005. }
  1006. }