ConnectionManager.swift 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. /*
  2. * Copyright 2020, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import Foundation
  17. import Logging
  18. import NIO
  19. import NIOConcurrencyHelpers
  20. internal class ConnectionManager {
  21. internal struct IdleState {
  22. var configuration: ClientConnection.Configuration
  23. }
  24. internal enum Reconnect {
  25. case none
  26. case after(TimeInterval)
  27. }
  28. internal struct ConnectingState {
  29. var configuration: ClientConnection.Configuration
  30. var backoffIterator: ConnectionBackoffIterator?
  31. var reconnect: Reconnect
  32. var readyChannelPromise: EventLoopPromise<Channel>
  33. var candidate: EventLoopFuture<Channel>
  34. }
  35. internal struct ConnectedState {
  36. var configuration: ClientConnection.Configuration
  37. var backoffIterator: ConnectionBackoffIterator?
  38. var reconnect: Reconnect
  39. var readyChannelPromise: EventLoopPromise<Channel>
  40. var candidate: Channel
  41. init(from state: ConnectingState, candidate: Channel) {
  42. self.configuration = state.configuration
  43. self.backoffIterator = state.backoffIterator
  44. self.reconnect = state.reconnect
  45. self.readyChannelPromise = state.readyChannelPromise
  46. self.candidate = candidate
  47. }
  48. }
  49. internal struct ReadyState {
  50. var configuration: ClientConnection.Configuration
  51. var channel: Channel
  52. init(from state: ConnectedState) {
  53. self.configuration = state.configuration
  54. self.channel = state.candidate
  55. }
  56. }
  57. internal struct TransientFailureState {
  58. var configuration: ClientConnection.Configuration
  59. var backoffIterator: ConnectionBackoffIterator?
  60. var readyChannelPromise: EventLoopPromise<Channel>
  61. var scheduled: Scheduled<Void>
  62. init(from state: ConnectingState, scheduled: Scheduled<Void>) {
  63. self.configuration = state.configuration
  64. self.backoffIterator = state.backoffIterator
  65. self.readyChannelPromise = state.readyChannelPromise
  66. self.scheduled = scheduled
  67. }
  68. init(from state: ConnectedState, scheduled: Scheduled<Void>) {
  69. self.configuration = state.configuration
  70. self.backoffIterator = state.backoffIterator
  71. self.readyChannelPromise = state.readyChannelPromise
  72. self.scheduled = scheduled
  73. }
  74. init(from state: ReadyState, scheduled: Scheduled<Void>) {
  75. self.configuration = state.configuration
  76. self.backoffIterator = state.configuration.connectionBackoff?.makeIterator()
  77. self.readyChannelPromise = state.channel.eventLoop.makePromise()
  78. self.scheduled = scheduled
  79. }
  80. }
  81. internal struct ShutdownState {
  82. var closeFuture: EventLoopFuture<Void>
  83. }
  84. internal enum State {
  85. /// No `Channel` is required.
  86. ///
  87. /// Valid next states:
  88. /// - `connecting`
  89. /// - `shutdown`
  90. case idle(IdleState)
  91. /// We're actively trying to establish a connection.
  92. ///
  93. /// Valid next states:
  94. /// - `active`
  95. /// - `transientFailure` (if our attempt fails and we're going to try again)
  96. /// - `shutdown`
  97. case connecting(ConnectingState)
  98. /// We've established a `Channel`, it might not be suitable (TLS handshake may fail, etc.).
  99. /// Our signal to be 'ready' is the initial HTTP/2 SETTINGS frame.
  100. ///
  101. /// Valid next states:
  102. /// - `ready`
  103. /// - `transientFailure` (if we our handshake fails or other error happens and we can attempt
  104. /// to re-establish the connection)
  105. /// - `shutdown`
  106. case active(ConnectedState)
  107. /// We have an active `Channel` which has seen the initial HTTP/2 SETTINGS frame. We can use
  108. /// the channel for making RPCs.
  109. ///
  110. /// Valid next states:
  111. /// - `idle` (we're not serving any RPCs, we can drop the connection for now)
  112. /// - `transientFailure` (we encountered an error and will re-establish the connection)
  113. /// - `shutdown`
  114. case ready(ReadyState)
  115. /// A `Channel` is desired, we'll attempt to create one in the future.
  116. ///
  117. /// Valid next states:
  118. /// - `connecting`
  119. /// - `shutdown`
  120. case transientFailure(TransientFailureState)
  121. /// We never want another `Channel`: this state is terminal.
  122. case shutdown(ShutdownState)
  123. }
  124. private var state: State {
  125. didSet {
  126. switch self.state {
  127. case .idle:
  128. self.monitor.updateState(to: .idle, logger: self.logger)
  129. // Create a new id; it'll be used for the *next* channel we create.
  130. self.channelNumberLock.withLockVoid {
  131. self.channelNumber &+= 1
  132. }
  133. self.logger[metadataKey: MetadataKey.connectionID] = "\(self.connectionIDAndNumber)"
  134. case .connecting:
  135. self.monitor.updateState(to: .connecting, logger: self.logger)
  136. // This is an internal state.
  137. case .active:
  138. ()
  139. case .ready:
  140. self.monitor.updateState(to: .ready, logger: self.logger)
  141. case .transientFailure:
  142. self.monitor.updateState(to: .transientFailure, logger: self.logger)
  143. case .shutdown:
  144. self.monitor.updateState(to: .shutdown, logger: self.logger)
  145. }
  146. }
  147. }
  148. internal let eventLoop: EventLoop
  149. internal let monitor: ConnectivityStateMonitor
  150. internal var logger: Logger
  151. private let connectionID: String
  152. private var channelNumber: UInt64
  153. private var channelNumberLock = Lock()
  154. private var connectionIDAndNumber: String {
  155. return self.channelNumberLock.withLock {
  156. return "\(self.connectionID)/\(self.channelNumber)"
  157. }
  158. }
  159. internal func appendMetadata(to logger: inout Logger) {
  160. logger[metadataKey: MetadataKey.connectionID] = "\(self.connectionIDAndNumber)"
  161. }
  162. // Only used for testing.
  163. private var channelProvider: (() -> EventLoopFuture<Channel>)?
  164. internal convenience init(configuration: ClientConnection.Configuration, logger: Logger) {
  165. self.init(configuration: configuration, logger: logger, channelProvider: nil)
  166. }
  167. /// Create a `ConnectionManager` for testing: uses the given `channelProvider` to create channels.
  168. internal static func testingOnly(
  169. configuration: ClientConnection.Configuration,
  170. logger: Logger,
  171. channelProvider: @escaping () -> EventLoopFuture<Channel>
  172. ) -> ConnectionManager {
  173. return ConnectionManager(
  174. configuration: configuration,
  175. logger: logger,
  176. channelProvider: channelProvider
  177. )
  178. }
  179. private init(
  180. configuration: ClientConnection.Configuration,
  181. logger: Logger,
  182. channelProvider: (() -> EventLoopFuture<Channel>)?
  183. ) {
  184. // Setup the logger.
  185. var logger = logger
  186. let connectionID = UUID().uuidString
  187. let channelNumber: UInt64 = 0
  188. logger[metadataKey: MetadataKey.connectionID] = "\(connectionID)/\(channelNumber)"
  189. let eventLoop = configuration.eventLoopGroup.next()
  190. self.eventLoop = eventLoop
  191. self.state = .idle(IdleState(configuration: configuration))
  192. self.monitor = ConnectivityStateMonitor(
  193. delegate: configuration.connectivityStateDelegate,
  194. queue: configuration.connectivityStateDelegateQueue
  195. )
  196. self.channelProvider = channelProvider
  197. self.connectionID = connectionID
  198. self.channelNumber = channelNumber
  199. self.logger = logger
  200. }
  201. /// Returns a future for a connected channel.
  202. internal func getChannel() -> EventLoopFuture<Channel> {
  203. return self.eventLoop.flatSubmit {
  204. switch self.state {
  205. case .idle:
  206. self.startConnecting()
  207. // We started connecting so we must transition to the `connecting` state.
  208. guard case let .connecting(connecting) = self.state else {
  209. self.invalidState()
  210. }
  211. return connecting.readyChannelPromise.futureResult
  212. case let .connecting(state):
  213. return state.readyChannelPromise.futureResult
  214. case let .active(state):
  215. return state.readyChannelPromise.futureResult
  216. case let .ready(state):
  217. return state.channel.eventLoop.makeSucceededFuture(state.channel)
  218. case let .transientFailure(state):
  219. return state.readyChannelPromise.futureResult
  220. case .shutdown:
  221. return self.eventLoop.makeFailedFuture(GRPCStatus(code: .unavailable, message: nil))
  222. }
  223. }
  224. }
  225. /// Returns a future for the current channel, or future channel from the current connection
  226. /// attempt, or if the state is 'idle' returns the future for the next connection attempt.
  227. ///
  228. /// Note: if the state is 'transientFailure' or 'shutdown' then a failed future will be returned.
  229. internal func getOptimisticChannel() -> EventLoopFuture<Channel> {
  230. return self.eventLoop.flatSubmit {
  231. switch self.state {
  232. case .idle:
  233. self.startConnecting()
  234. // We started connecting so we must transition to the `connecting` state.
  235. guard case let .connecting(connecting) = self.state else {
  236. self.invalidState()
  237. }
  238. return connecting.candidate
  239. case let .connecting(state):
  240. return state.candidate
  241. case let .active(state):
  242. return state.candidate.eventLoop.makeSucceededFuture(state.candidate)
  243. case let .ready(state):
  244. return state.channel.eventLoop.makeSucceededFuture(state.channel)
  245. case .transientFailure:
  246. return self.eventLoop.makeFailedFuture(ChannelError.ioOnClosedChannel)
  247. case .shutdown:
  248. return self.eventLoop.makeFailedFuture(GRPCStatus(code: .unavailable, message: nil))
  249. }
  250. }
  251. }
  252. /// Shutdown any connection which exists. This is a request from the application.
  253. internal func shutdown() -> EventLoopFuture<Void> {
  254. return self.eventLoop.flatSubmit {
  255. let shutdown: ShutdownState
  256. switch self.state {
  257. // We don't have a channel and we don't want one, easy!
  258. case .idle:
  259. shutdown = ShutdownState(closeFuture: self.eventLoop.makeSucceededFuture(()))
  260. self.state = .shutdown(shutdown)
  261. // We're mid-connection: the application doesn't have any 'ready' channels so we'll succeed
  262. // the shutdown future and deal with any fallout from the connecting channel without the
  263. // application knowing.
  264. case let .connecting(state):
  265. shutdown = ShutdownState(closeFuture: self.eventLoop.makeSucceededFuture(()))
  266. self.state = .shutdown(shutdown)
  267. // Fail the ready channel promise: we're shutting down so even if we manage to successfully
  268. // connect the application shouldn't should have access to the channel.
  269. state.readyChannelPromise.fail(GRPCStatus(code: .unavailable, message: nil))
  270. // In case we do successfully connect, close immediately.
  271. state.candidate.whenSuccess {
  272. $0.close(mode: .all, promise: nil)
  273. }
  274. // We have an active channel but the application doesn't know about it yet. We'll do the same
  275. // as for `.connecting`.
  276. case let .active(state):
  277. shutdown = ShutdownState(closeFuture: self.eventLoop.makeSucceededFuture(()))
  278. self.state = .shutdown(shutdown)
  279. // Fail the ready channel promise: we're shutting down so even if we manage to successfully
  280. // connect the application shouldn't should have access to the channel.
  281. state.readyChannelPromise.fail(GRPCStatus(code: .unavailable, message: nil))
  282. // We have a channel, close it.
  283. state.candidate.close(mode: .all, promise: nil)
  284. // The channel is up and running: the application could be using it. We can close it and
  285. // return the `closeFuture`.
  286. case let .ready(state):
  287. shutdown = ShutdownState(closeFuture: state.channel.closeFuture)
  288. self.state = .shutdown(shutdown)
  289. // We have a channel, close it.
  290. state.channel.close(mode: .all, promise: nil)
  291. // Like `.connecting` and `.active` the application does not have a `.ready` channel. We'll
  292. // do the same but also cancel any scheduled connection attempts and deal with any fallout
  293. // if we cancelled too late.
  294. case let .transientFailure(state):
  295. // Stop the creation of a new channel, if we can. If we can't then the task to
  296. // `startConnecting()` will see our new `shutdown` state and ignore the request to connect.
  297. state.scheduled.cancel()
  298. shutdown = ShutdownState(closeFuture: self.eventLoop.makeSucceededFuture(()))
  299. self.state = .shutdown(shutdown)
  300. // Fail the ready channel promise: we're shutting down so even if we manage to successfully
  301. // connect the application shouldn't should have access to the channel.
  302. state.readyChannelPromise.fail(GRPCStatus(code: .unavailable, message: nil))
  303. // We're already shutdown; nothing to do.
  304. case let .shutdown(state):
  305. shutdown = state
  306. }
  307. return shutdown.closeFuture
  308. }
  309. }
  310. // MARK: - State changes from the channel handler.
  311. /// The connecting channel became `active`. Must be called on the `EventLoop`.
  312. internal func channelActive(channel: Channel) {
  313. self.eventLoop.preconditionInEventLoop()
  314. switch self.state {
  315. case let .connecting(connecting):
  316. self.state = .active(ConnectedState(from: connecting, candidate: channel))
  317. // Application called shutdown before the channel become active; we should close it.
  318. case .shutdown:
  319. channel.close(mode: .all, promise: nil)
  320. case .idle, .active, .ready, .transientFailure:
  321. self.invalidState()
  322. }
  323. }
  324. /// An established channel (i.e. `active` or `ready`) has become inactive: should we reconnect?
  325. /// Must be called on the `EventLoop`.
  326. internal func channelInactive() {
  327. self.eventLoop.preconditionInEventLoop()
  328. switch self.state {
  329. // The channel is `active` but not `ready`. Should we try again?
  330. case let .active(active):
  331. switch active.reconnect {
  332. // No, shutdown instead.
  333. case .none:
  334. self.state = .shutdown(ShutdownState(closeFuture: self.eventLoop.makeSucceededFuture(())))
  335. active.readyChannelPromise.fail(GRPCStatus(code: .unavailable, message: nil))
  336. // Yes, after some time.
  337. case let .after(delay):
  338. let scheduled = self.eventLoop.scheduleTask(in: .seconds(timeInterval: delay)) {
  339. self.startConnecting()
  340. }
  341. self.state = .transientFailure(TransientFailureState(from: active, scheduled: scheduled))
  342. }
  343. // The channel was ready and working fine but something went wrong. Should we try to replace
  344. // the channel?
  345. case let .ready(ready):
  346. // No, no backoff is configured.
  347. if ready.configuration.connectionBackoff == nil {
  348. self.state = .shutdown(ShutdownState(closeFuture: ready.channel.closeFuture))
  349. } else {
  350. // Yes, start connecting now. We should go via `transientFailure`, however.
  351. let scheduled = self.eventLoop.scheduleTask(in: .nanoseconds(0)) {
  352. self.startConnecting()
  353. }
  354. self.state = .transientFailure(TransientFailureState(from: ready, scheduled: scheduled))
  355. }
  356. // This is fine: we expect the channel to become inactive after becoming idle.
  357. case .idle:
  358. ()
  359. // We're already shutdown, that's fine.
  360. case .shutdown:
  361. ()
  362. case .connecting, .transientFailure:
  363. self.invalidState()
  364. }
  365. }
  366. /// The channel has become ready, that is, it has seen the initial HTTP/2 SETTINGS frame. Must be
  367. /// called on the `EventLoop`.
  368. internal func ready() {
  369. self.eventLoop.preconditionInEventLoop()
  370. switch self.state {
  371. case let .active(connected):
  372. self.state = .ready(ReadyState(from: connected))
  373. connected.readyChannelPromise.succeed(connected.candidate)
  374. case .shutdown:
  375. ()
  376. case .idle, .transientFailure, .connecting, .ready:
  377. self.invalidState()
  378. }
  379. }
  380. /// No active RPCs are happening on 'ready' channel: close the channel for now. Must be called on
  381. /// the `EventLoop`.
  382. internal func idle() {
  383. self.eventLoop.preconditionInEventLoop()
  384. switch self.state {
  385. case let .ready(state):
  386. self.state = .idle(IdleState(configuration: state.configuration))
  387. case .idle, .connecting, .transientFailure, .active, .shutdown:
  388. self.invalidState()
  389. }
  390. }
  391. }
  392. extension ConnectionManager {
  393. // A connection attempt failed; we never established a connection.
  394. private func connectionFailed(withError error: Error) {
  395. self.eventLoop.preconditionInEventLoop()
  396. switch self.state {
  397. case let .connecting(connecting):
  398. // Should we reconnect?
  399. switch connecting.reconnect {
  400. // No, shutdown.
  401. case .none:
  402. connecting.readyChannelPromise.fail(error)
  403. self.state = .shutdown(ShutdownState(closeFuture: self.eventLoop.makeSucceededFuture(())))
  404. // Yes, after a delay.
  405. case let .after(delay):
  406. let scheduled = self.eventLoop.scheduleTask(in: .seconds(timeInterval: delay)) {
  407. self.startConnecting()
  408. }
  409. self
  410. .state = .transientFailure(TransientFailureState(from: connecting, scheduled: scheduled))
  411. }
  412. // The application must have called shutdown while we were trying to establish a connection
  413. // which was doomed to fail anyway. That's fine, we can ignore this.
  414. case .shutdown:
  415. ()
  416. // We can't fail to connect if we aren't trying.
  417. case .idle, .active, .ready, .transientFailure:
  418. self.invalidState()
  419. }
  420. }
  421. }
  422. extension ConnectionManager {
  423. // Start establishing a connection: we can only do this from the `idle` and `transientFailure`
  424. // states. Must be called on the `EventLoop`.
  425. private func startConnecting() {
  426. switch self.state {
  427. case let .idle(state):
  428. let iterator = state.configuration.connectionBackoff?.makeIterator()
  429. self.startConnecting(
  430. configuration: state.configuration,
  431. backoffIterator: iterator,
  432. channelPromise: self.eventLoop.makePromise()
  433. )
  434. case let .transientFailure(pending):
  435. self.startConnecting(
  436. configuration: pending.configuration,
  437. backoffIterator: pending.backoffIterator,
  438. channelPromise: pending.readyChannelPromise
  439. )
  440. // We shutdown before a scheduled connection attempt had started.
  441. case .shutdown:
  442. ()
  443. case .connecting, .active, .ready:
  444. self.invalidState()
  445. }
  446. }
  447. private func startConnecting(
  448. configuration: ClientConnection.Configuration,
  449. backoffIterator: ConnectionBackoffIterator?,
  450. channelPromise: EventLoopPromise<Channel>
  451. ) {
  452. let timeoutAndBackoff = backoffIterator?.next()
  453. // We're already on the event loop: submit the connect so it starts after we've made the
  454. // state change to `.connecting`.
  455. self.eventLoop.assertInEventLoop()
  456. let candidate: EventLoopFuture<Channel> = self.eventLoop.flatSubmit {
  457. let channel = self.makeChannel(
  458. configuration: configuration,
  459. connectTimeout: timeoutAndBackoff?.timeout
  460. )
  461. channel.whenFailure { error in
  462. self.connectionFailed(withError: error)
  463. }
  464. return channel
  465. }
  466. // Should we reconnect if the candidate channel fails?
  467. let reconnect: Reconnect = timeoutAndBackoff.map { .after($0.backoff) } ?? .none
  468. let connecting = ConnectingState(
  469. configuration: configuration,
  470. backoffIterator: backoffIterator,
  471. reconnect: reconnect,
  472. readyChannelPromise: channelPromise,
  473. candidate: candidate
  474. )
  475. self.state = .connecting(connecting)
  476. }
  477. }
  478. extension ConnectionManager {
  479. private func invalidState(
  480. function: StaticString = #function,
  481. file: StaticString = #file,
  482. line: UInt = #line
  483. ) -> Never {
  484. preconditionFailure("Invalid state \(self.state) for \(function)", file: file, line: line)
  485. }
  486. }
  487. extension ConnectionManager {
  488. private func makeBootstrap(
  489. configuration: ClientConnection.Configuration,
  490. connectTimeout: TimeInterval?
  491. ) -> ClientBootstrapProtocol {
  492. let serverHostname: String? = configuration.tls.flatMap { tls -> String? in
  493. if let hostnameOverride = tls.hostnameOverride {
  494. return hostnameOverride
  495. } else {
  496. return configuration.target.host
  497. }
  498. }.flatMap { hostname in
  499. if hostname.isIPAddress {
  500. return nil
  501. } else {
  502. return hostname
  503. }
  504. }
  505. let bootstrap = PlatformSupport.makeClientBootstrap(group: self.eventLoop, logger: self.logger)
  506. .channelOption(ChannelOptions.socket(SocketOptionLevel(SOL_SOCKET), SO_REUSEADDR), value: 1)
  507. .channelOption(ChannelOptions.socket(IPPROTO_TCP, TCP_NODELAY), value: 1)
  508. .channelInitializer { channel in
  509. let initialized = channel.configureGRPCClient(
  510. httpTargetWindowSize: configuration.httpTargetWindowSize,
  511. tlsConfiguration: configuration.tls?.configuration,
  512. tlsServerHostname: serverHostname,
  513. connectionManager: self,
  514. connectionKeepalive: configuration.connectionKeepalive,
  515. connectionIdleTimeout: configuration.connectionIdleTimeout,
  516. errorDelegate: configuration.errorDelegate,
  517. requiresZeroLengthWriteWorkaround: PlatformSupport.requiresZeroLengthWriteWorkaround(
  518. group: self.eventLoop,
  519. hasTLS: configuration.tls != nil
  520. ),
  521. logger: self.logger
  522. )
  523. // Run the debug initializer, if there is one.
  524. if let debugInitializer = configuration.debugChannelInitializer {
  525. return initialized.flatMap {
  526. debugInitializer(channel)
  527. }
  528. } else {
  529. return initialized
  530. }
  531. }
  532. if let connectTimeout = connectTimeout {
  533. return bootstrap.connectTimeout(.seconds(timeInterval: connectTimeout))
  534. } else {
  535. return bootstrap
  536. }
  537. }
  538. private func makeChannel(
  539. configuration: ClientConnection.Configuration,
  540. connectTimeout: TimeInterval?
  541. ) -> EventLoopFuture<Channel> {
  542. if let provider = self.channelProvider {
  543. return provider()
  544. } else {
  545. let bootstrap = self.makeBootstrap(
  546. configuration: configuration,
  547. connectTimeout: connectTimeout
  548. )
  549. return bootstrap.connect(to: configuration.target)
  550. }
  551. }
  552. }