GRPCIdleHandlerStateMachine.swift 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. /*
  2. * Copyright 2020, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import Logging
  17. import NIO
  18. import NIOHTTP2
  19. /// Holds state for the 'GRPCIdleHandler', this isn't really just the idleness of the connection,
  20. /// it also holds state relevant to quiescing the connection as well as logging some HTTP/2 specific
  21. /// information (like stream creation/close events and changes to settings which can be useful when
  22. /// debugging live systems). Much of this information around the connection state is also used to
  23. /// inform the client connection manager since that's strongly tied to various channel and HTTP/2
  24. /// events.
  25. struct GRPCIdleHandlerStateMachine {
  26. /// Our role in the connection.
  27. enum Role {
  28. case server
  29. case client
  30. }
  31. /// The 'operating' state of the connection. This is the primary state we expect to be in: the
  32. /// connection is up and running and there are expected to be active RPCs, although this is by no
  33. /// means a requirement. Some of the situations in which there may be no active RPCs are:
  34. ///
  35. /// 1. Before the connection is 'ready' (that is, seen the first SETTINGS frame),
  36. /// 2. After the connection has dropped to zero active streams and before the idle timeout task
  37. /// has been scheduled.
  38. /// 3. When the connection has zero active streams and the connection was configured without an
  39. /// idle timeout.
  40. fileprivate struct Operating: CanOpenStreams, CanCloseStreams {
  41. /// Our role in the connection.
  42. var role: Role
  43. /// The number of open stream.
  44. var openStreams: Int
  45. /// The last stream ID initiated by the remote peer.
  46. var lastPeerInitiatedStreamID: HTTP2StreamID
  47. /// The maximum number of concurrent streams we are allowed to operate.
  48. var maxConcurrentStreams: Int
  49. /// We keep track of whether we've seen a SETTINGS frame. We expect to see one after the
  50. /// connection preface (RFC 7540 § 3.5). This is primarily for the benefit of the client which
  51. /// determines a connection to be 'ready' once it has seen the first SETTINGS frame. We also
  52. /// won't set an idle timeout until this becomes true.
  53. var hasSeenSettings: Bool
  54. fileprivate init(role: Role) {
  55. self.role = role
  56. self.openStreams = 0
  57. self.lastPeerInitiatedStreamID = .rootStream
  58. // Assumed until we know better.
  59. self.maxConcurrentStreams = 100
  60. self.hasSeenSettings = false
  61. }
  62. fileprivate init(fromWaitingToIdle state: WaitingToIdle) {
  63. self.role = state.role
  64. self.openStreams = 0
  65. self.lastPeerInitiatedStreamID = state.lastPeerInitiatedStreamID
  66. self.maxConcurrentStreams = state.maxConcurrentStreams
  67. // We won't transition to 'WaitingToIdle' unless we've seen a SETTINGS frame.
  68. self.hasSeenSettings = true
  69. }
  70. }
  71. /// The waiting-to-idle state is used when the connection has become 'ready', has no active
  72. /// RPCs and an idle timeout task has been scheduled. In this state, the connection will be closed
  73. /// once the idle is fired. The task will be cancelled on the creation of a stream.
  74. fileprivate struct WaitingToIdle {
  75. /// Our role in the connection.
  76. var role: Role
  77. /// The last stream ID initiated by the remote peer.
  78. var lastPeerInitiatedStreamID: HTTP2StreamID
  79. /// The maximum number of concurrent streams we are allowed to operate.
  80. var maxConcurrentStreams: Int
  81. /// A task which, when fired, will idle the connection.
  82. var idleTask: Scheduled<Void>
  83. fileprivate init(fromOperating state: Operating, idleTask: Scheduled<Void>) {
  84. // We won't transition to this state unless we've seen a SETTINGS frame.
  85. assert(state.hasSeenSettings)
  86. self.role = state.role
  87. self.lastPeerInitiatedStreamID = state.lastPeerInitiatedStreamID
  88. self.maxConcurrentStreams = state.maxConcurrentStreams
  89. self.idleTask = idleTask
  90. }
  91. }
  92. /// The quiescing state is entered only from the operating state. It may be entered if we receive
  93. /// a GOAWAY frame (the remote peer initiated the quiescing) or we initiate graceful shutdown
  94. /// locally.
  95. fileprivate struct Quiescing: TracksOpenStreams, CanCloseStreams {
  96. /// Our role in the connection.
  97. var role: Role
  98. /// The number of open stream.
  99. var openStreams: Int
  100. /// The maximum number of concurrent streams we are allowed to operate.
  101. var maxConcurrentStreams: Int
  102. /// Whether this peer initiated shutting down.
  103. var initiatedByUs: Bool
  104. fileprivate init(fromOperating state: Operating, initiatedByUs: Bool) {
  105. // If we didn't initiate shutdown, the remote peer must have done so by sending a GOAWAY frame
  106. // in which case we must have seen a SETTINGS frame.
  107. assert(initiatedByUs || state.hasSeenSettings)
  108. self.role = state.role
  109. self.initiatedByUs = initiatedByUs
  110. self.openStreams = state.openStreams
  111. self.maxConcurrentStreams = state.maxConcurrentStreams
  112. }
  113. }
  114. /// The closing state is entered when one of the previous states initiates a connection closure.
  115. /// From this state the only possible transition is to the closed state.
  116. fileprivate struct Closing {
  117. /// Our role in the connection.
  118. var role: Role
  119. /// Should the client connection manager receive an idle event when we close? (If not then it
  120. /// will attempt to establish a new connection immediately.)
  121. var shouldIdle: Bool
  122. fileprivate init(fromOperating state: Operating) {
  123. self.role = state.role
  124. // Idle if there are no open streams and we've seen the first SETTINGS frame.
  125. self.shouldIdle = !state.hasOpenStreams && state.hasSeenSettings
  126. }
  127. fileprivate init(fromQuiescing state: Quiescing) {
  128. self.role = state.role
  129. // If we initiated the quiescing then we shouldn't go idle (we want to shutdown instead).
  130. self.shouldIdle = !state.initiatedByUs
  131. }
  132. fileprivate init(fromWaitingToIdle state: WaitingToIdle, shouldIdle: Bool = true) {
  133. self.role = state.role
  134. self.shouldIdle = shouldIdle
  135. }
  136. }
  137. fileprivate enum State {
  138. case operating(Operating)
  139. case waitingToIdle(WaitingToIdle)
  140. case quiescing(Quiescing)
  141. case closing(Closing)
  142. case closed
  143. }
  144. /// The set of operations that should be performed as a result of interaction with the state
  145. /// machine.
  146. struct Operations {
  147. /// An event to notify the connection manager about.
  148. private(set) var connectionManagerEvent: ConnectionManagerEvent?
  149. /// An idle task, either scheduling or cancelling an idle timeout.
  150. private(set) var idleTask: IdleTask?
  151. /// Send a GOAWAY frame with the last peer initiated stream ID set to this value.
  152. private(set) var sendGoAwayWithLastPeerInitiatedStreamID: HTTP2StreamID?
  153. /// Whether the channel should be closed.
  154. private(set) var shouldCloseChannel: Bool
  155. fileprivate static let none = Operations()
  156. fileprivate mutating func sendGoAwayFrame(lastPeerInitiatedStreamID streamID: HTTP2StreamID) {
  157. self.sendGoAwayWithLastPeerInitiatedStreamID = streamID
  158. }
  159. fileprivate mutating func cancelIdleTask(_ task: Scheduled<Void>) {
  160. self.idleTask = .cancel(task)
  161. }
  162. fileprivate mutating func scheduleIdleTask() {
  163. self.idleTask = .schedule
  164. }
  165. fileprivate mutating func closeChannel() {
  166. self.shouldCloseChannel = true
  167. }
  168. fileprivate mutating func notifyConnectionManager(about event: ConnectionManagerEvent) {
  169. self.connectionManagerEvent = event
  170. }
  171. private init() {
  172. self.connectionManagerEvent = nil
  173. self.idleTask = nil
  174. self.sendGoAwayWithLastPeerInitiatedStreamID = nil
  175. self.shouldCloseChannel = false
  176. }
  177. }
  178. /// An event to notify the 'ConnectionManager' about.
  179. enum ConnectionManagerEvent {
  180. case inactive
  181. case idle
  182. case ready
  183. }
  184. enum IdleTask {
  185. case schedule
  186. case cancel(Scheduled<Void>)
  187. }
  188. /// The current state.
  189. private var state: State
  190. /// A logger.
  191. private let logger: Logger
  192. /// Create a new state machine.
  193. init(role: Role, logger: Logger) {
  194. self.state = .operating(.init(role: role))
  195. self.logger = logger
  196. }
  197. // MARK: Stream Events
  198. /// An HTTP/2 stream was created.
  199. mutating func streamCreated(withID streamID: HTTP2StreamID) -> Operations {
  200. var operations: Operations = .none
  201. switch self.state {
  202. case var .operating(state):
  203. // Create the stream.
  204. state.streamCreated(streamID, logger: self.logger)
  205. self.state = .operating(state)
  206. case let .waitingToIdle(state):
  207. var operating = Operating(fromWaitingToIdle: state)
  208. operating.streamCreated(streamID, logger: self.logger)
  209. self.state = .operating(operating)
  210. operations.cancelIdleTask(state.idleTask)
  211. case .quiescing:
  212. // Streams can't be created if we're quiescing.
  213. preconditionFailure()
  214. case .closing, .closed:
  215. ()
  216. }
  217. return operations
  218. }
  219. /// An HTTP/2 stream was closed.
  220. mutating func streamClosed(withID streamID: HTTP2StreamID) -> Operations {
  221. var operations: Operations = .none
  222. switch self.state {
  223. case var .operating(state):
  224. state.streamClosed(streamID, logger: self.logger)
  225. if state.hasSeenSettings, !state.hasOpenStreams {
  226. operations.scheduleIdleTask()
  227. }
  228. self.state = .operating(state)
  229. case .waitingToIdle:
  230. // If we're waiting to idle then there can't be any streams open which can be closed.
  231. preconditionFailure()
  232. case var .quiescing(state):
  233. state.streamClosed(streamID, logger: self.logger)
  234. if state.hasOpenStreams {
  235. self.state = .quiescing(state)
  236. } else {
  237. self.state = .closing(.init(fromQuiescing: state))
  238. operations.closeChannel()
  239. }
  240. case .closing, .closed:
  241. ()
  242. }
  243. return operations
  244. }
  245. // MARK: - Idle Events
  246. /// The given task was scheduled to idle the connection.
  247. mutating func scheduledIdleTimeoutTask(_ task: Scheduled<Void>) -> Operations {
  248. var operations: Operations = .none
  249. switch self.state {
  250. case let .operating(state):
  251. if state.hasOpenStreams {
  252. operations.cancelIdleTask(task)
  253. } else {
  254. self.state = .waitingToIdle(.init(fromOperating: state, idleTask: task))
  255. }
  256. case .waitingToIdle:
  257. // There's already an idle task.
  258. preconditionFailure()
  259. case .quiescing, .closing, .closed:
  260. operations.cancelIdleTask(task)
  261. }
  262. return operations
  263. }
  264. /// The idle timeout task fired, the connection should be idled.
  265. mutating func idleTimeoutTaskFired() -> Operations {
  266. var operations: Operations = .none
  267. switch self.state {
  268. case let .waitingToIdle(state):
  269. self.state = .closing(.init(fromWaitingToIdle: state))
  270. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  271. operations.closeChannel()
  272. // We're either operating on streams, streams are going away, or the connection is going away
  273. // so we don't need to idle the connection.
  274. case .operating, .quiescing, .closing, .closed:
  275. ()
  276. }
  277. return operations
  278. }
  279. // MARK: - Shutdown Events
  280. /// Close the connection, this can be caused as a result of a keepalive timeout (i.e. the server
  281. /// has become unresponsive), we'll bin this connection as a result.
  282. mutating func shutdownNow() -> Operations {
  283. var operations = Operations.none
  284. switch self.state {
  285. case let .operating(state):
  286. var closing = Closing(fromOperating: state)
  287. closing.shouldIdle = false
  288. self.state = .closing(closing)
  289. operations.closeChannel()
  290. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  291. case let .waitingToIdle(state):
  292. // Don't idle.
  293. self.state = .closing(Closing(fromWaitingToIdle: state, shouldIdle: false))
  294. operations.closeChannel()
  295. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  296. operations.cancelIdleTask(state.idleTask)
  297. case let .quiescing(state):
  298. self.state = .closing(Closing(fromQuiescing: state))
  299. // We've already sent a GOAWAY frame if we're in this state, just close.
  300. operations.closeChannel()
  301. case .closing, .closed:
  302. ()
  303. }
  304. return operations
  305. }
  306. /// Initiate a graceful shutdown of this connection, that is, begin quiescing.
  307. mutating func initiateGracefulShutdown() -> Operations {
  308. var operations: Operations = .none
  309. switch self.state {
  310. case let .operating(state):
  311. // Send a GOAWAY frame.
  312. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  313. if state.hasOpenStreams {
  314. // There are open streams: send a GOAWAY frame and wait for the stream count to reach zero.
  315. //
  316. // It's okay if we haven't seen a SETTINGS frame at this point; we've initiated the shutdown
  317. // so making a connection is ready isn't necessary.
  318. self.state = .quiescing(.init(fromOperating: state, initiatedByUs: true))
  319. } else {
  320. // No open streams: send a GOAWAY frame and close the channel.
  321. self.state = .closing(.init(fromOperating: state))
  322. operations.closeChannel()
  323. }
  324. case let .waitingToIdle(state):
  325. // There can't be any open streams, but we have a few loose ends to clear up: we need to
  326. // cancel the idle timeout, send a GOAWAY frame and then close. We don't want to idle from the
  327. // closing state: we want to shutdown instead.
  328. self.state = .closing(.init(fromWaitingToIdle: state, shouldIdle: false))
  329. operations.cancelIdleTask(state.idleTask)
  330. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  331. operations.closeChannel()
  332. case var .quiescing(state):
  333. // We're already quiescing: either the remote initiated it or we're initiating it more than
  334. // once. Set ourselves as the initiator to ensure we don't idle when we eventually close, this
  335. // is important for the client: if the server initiated this then we establish a new
  336. // connection when we close, unless we also initiated shutdown.
  337. state.initiatedByUs = true
  338. self.state = .quiescing(state)
  339. case var .closing(state):
  340. // We've already called 'close()', make sure we don't go idle.
  341. state.shouldIdle = false
  342. self.state = .closing(state)
  343. case .closed:
  344. ()
  345. }
  346. return operations
  347. }
  348. /// We've received a GOAWAY frame from the remote peer. Either the remote peer wants to close the
  349. /// connection or they're responding to us shutting down the connection.
  350. mutating func receiveGoAway() -> Operations {
  351. var operations: Operations = .none
  352. switch self.state {
  353. case let .operating(state):
  354. // A SETTINGS frame MUST follow the connection preface. (RFC 7540 § 3.5)
  355. assert(state.hasSeenSettings)
  356. // Send a GOAWAY frame in response.
  357. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  358. if state.hasOpenStreams {
  359. self.state = .quiescing(.init(fromOperating: state, initiatedByUs: false))
  360. } else {
  361. // No open streams, we can close as well.
  362. self.state = .closing(.init(fromOperating: state))
  363. operations.closeChannel()
  364. }
  365. case let .waitingToIdle(state):
  366. // There can't be any open streams, but we have a few loose ends to clear up: we need to
  367. // cancel the idle timeout, send a GOAWAY frame and then close.
  368. self.state = .closing(.init(fromWaitingToIdle: state))
  369. operations.cancelIdleTask(state.idleTask)
  370. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  371. operations.closeChannel()
  372. case .quiescing:
  373. // We're already quiescing, this changes nothing.
  374. ()
  375. case .closing, .closed:
  376. // We're already closing/closed (so must have emitted a GOAWAY frame already). Ignore this.
  377. ()
  378. }
  379. return operations
  380. }
  381. mutating func receiveSettings(_ settings: HTTP2Settings) -> Operations {
  382. // Log the change in settings.
  383. self.logger.debug(
  384. "HTTP2 settings update",
  385. metadata: Dictionary(settings.map {
  386. ("\($0.parameter.loggingMetadataKey)", "\($0.value)")
  387. }, uniquingKeysWith: { a, _ in a })
  388. )
  389. var operations: Operations = .none
  390. switch self.state {
  391. case var .operating(state):
  392. // If we hadn't previously seen settings then we need to notify the client connection manager
  393. // that we're now ready.
  394. if !state.hasSeenSettings {
  395. operations.notifyConnectionManager(about: .ready)
  396. state.hasSeenSettings = true
  397. // Now that we know the connection is ready, we may want to start an idle timeout as well.
  398. if !state.hasOpenStreams {
  399. operations.scheduleIdleTask()
  400. }
  401. }
  402. // Update max concurrent streams.
  403. if let maxStreams = settings.last(where: { $0.parameter == .maxConcurrentStreams })?.value {
  404. state.maxConcurrentStreams = maxStreams
  405. }
  406. self.state = .operating(state)
  407. case var .waitingToIdle(state):
  408. // Update max concurrent streams.
  409. if let maxStreams = settings.last(where: { $0.parameter == .maxConcurrentStreams })?.value {
  410. state.maxConcurrentStreams = maxStreams
  411. }
  412. self.state = .waitingToIdle(state)
  413. case .quiescing, .closing, .closed:
  414. ()
  415. }
  416. return operations
  417. }
  418. // MARK: - Channel Events
  419. // (Other channel events aren't included here as they don't impact the state machine.)
  420. /// 'channelActive' was called in the idle handler holding this state machine.
  421. mutating func channelInactive() -> Operations {
  422. var operations: Operations = .none
  423. switch self.state {
  424. case let .operating(state):
  425. self.state = .closed
  426. // We unexpectedly became inactive.
  427. if !state.hasSeenSettings || state.hasOpenStreams {
  428. // Haven't seen settings, or we've seen settings and there are open streams.
  429. operations.notifyConnectionManager(about: .inactive)
  430. } else {
  431. // Have seen settings and there are no open streams.
  432. operations.notifyConnectionManager(about: .idle)
  433. }
  434. case let .waitingToIdle(state):
  435. self.state = .closed
  436. // We were going to idle anyway.
  437. operations.notifyConnectionManager(about: .idle)
  438. operations.cancelIdleTask(state.idleTask)
  439. case let .quiescing(state):
  440. self.state = .closed
  441. if state.initiatedByUs || state.hasOpenStreams {
  442. operations.notifyConnectionManager(about: .inactive)
  443. } else {
  444. operations.notifyConnectionManager(about: .idle)
  445. }
  446. case let .closing(state):
  447. self.state = .closed
  448. if state.shouldIdle {
  449. operations.notifyConnectionManager(about: .idle)
  450. } else {
  451. operations.notifyConnectionManager(about: .inactive)
  452. }
  453. case .closed:
  454. ()
  455. }
  456. return operations
  457. }
  458. }
  459. // MARK: - Helper Protocols
  460. private protocol TracksOpenStreams {
  461. /// The number of open streams.
  462. var openStreams: Int { get set }
  463. }
  464. extension TracksOpenStreams {
  465. /// Whether any streams are open.
  466. fileprivate var hasOpenStreams: Bool {
  467. return self.openStreams != 0
  468. }
  469. }
  470. private protocol CanOpenStreams: TracksOpenStreams {
  471. /// The role of this peer in the connection.
  472. var role: GRPCIdleHandlerStateMachine.Role { get }
  473. /// The ID of the stream most recently initiated by the remote peer.
  474. var lastPeerInitiatedStreamID: HTTP2StreamID { get set }
  475. /// The maximum number of concurrent streams.
  476. var maxConcurrentStreams: Int { get set }
  477. mutating func streamCreated(_ streamID: HTTP2StreamID, logger: Logger)
  478. }
  479. extension CanOpenStreams {
  480. fileprivate mutating func streamCreated(_ streamID: HTTP2StreamID, logger: Logger) {
  481. self.openStreams += 1
  482. switch self.role {
  483. case .client where streamID.isServerInitiated:
  484. self.lastPeerInitiatedStreamID = streamID
  485. case .server where streamID.isClientInitiated:
  486. self.lastPeerInitiatedStreamID = streamID
  487. default:
  488. ()
  489. }
  490. logger.debug("HTTP2 stream created", metadata: [
  491. MetadataKey.h2StreamID: "\(streamID)",
  492. MetadataKey.h2ActiveStreams: "\(self.openStreams)",
  493. ])
  494. if self.openStreams == self.maxConcurrentStreams {
  495. logger.warning("HTTP2 max concurrent stream limit reached", metadata: [
  496. MetadataKey.h2ActiveStreams: "\(self.openStreams)",
  497. ])
  498. }
  499. }
  500. }
  501. private protocol CanCloseStreams: TracksOpenStreams {
  502. /// Notes that a stream has closed.
  503. mutating func streamClosed(_ streamID: HTTP2StreamID, logger: Logger)
  504. }
  505. extension CanCloseStreams {
  506. fileprivate mutating func streamClosed(_ streamID: HTTP2StreamID, logger: Logger) {
  507. self.openStreams -= 1
  508. logger.debug("HTTP2 stream closed", metadata: [
  509. MetadataKey.h2StreamID: "\(streamID)",
  510. MetadataKey.h2ActiveStreams: "\(self.openStreams)",
  511. ])
  512. }
  513. }