GRPCIdleHandlerStateMachine.swift 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /*
  2. * Copyright 2020, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import Logging
  17. import NIO
  18. import NIOHTTP2
  19. /// Holds state for the 'GRPCIdleHandler', this isn't really just the idleness of the connection,
  20. /// it also holds state relevant to quiescing the connection as well as logging some HTTP/2 specific
  21. /// information (like stream creation/close events and changes to settings which can be useful when
  22. /// debugging live systems). Much of this information around the connection state is also used to
  23. /// inform the client connection manager since that's strongly tied to various channel and HTTP/2
  24. /// events.
  25. struct GRPCIdleHandlerStateMachine {
  26. /// Our role in the connection.
  27. enum Role {
  28. case server
  29. case client
  30. }
  31. /// The 'operating' state of the connection. This is the primary state we expect to be in: the
  32. /// connection is up and running and there are expected to be active RPCs, although this is by no
  33. /// means a requirement. Some of the situations in which there may be no active RPCs are:
  34. ///
  35. /// 1. Before the connection is 'ready' (that is, seen the first SETTINGS frame),
  36. /// 2. After the connection has dropped to zero active streams and before the idle timeout task
  37. /// has been scheduled.
  38. /// 3. When the connection has zero active streams and the connection was configured without an
  39. /// idle timeout.
  40. fileprivate struct Operating: CanOpenStreams, CanCloseStreams {
  41. /// Our role in the connection.
  42. var role: Role
  43. /// The number of open stream.
  44. var openStreams: Int
  45. /// The last stream ID initiated by the remote peer.
  46. var lastPeerInitiatedStreamID: HTTP2StreamID
  47. /// The maximum number of concurrent streams we are allowed to operate.
  48. var maxConcurrentStreams: Int
  49. /// We keep track of whether we've seen a SETTINGS frame. We expect to see one after the
  50. /// connection preface (RFC 7540 § 3.5). This is primarily for the benefit of the client which
  51. /// determines a connection to be 'ready' once it has seen the first SETTINGS frame. We also
  52. /// won't set an idle timeout until this becomes true.
  53. var hasSeenSettings: Bool
  54. fileprivate init(role: Role) {
  55. self.role = role
  56. self.openStreams = 0
  57. self.lastPeerInitiatedStreamID = .rootStream
  58. // Assumed until we know better.
  59. self.maxConcurrentStreams = 100
  60. self.hasSeenSettings = false
  61. }
  62. fileprivate init(fromWaitingToIdle state: WaitingToIdle) {
  63. self.role = state.role
  64. self.openStreams = 0
  65. self.lastPeerInitiatedStreamID = state.lastPeerInitiatedStreamID
  66. self.maxConcurrentStreams = state.maxConcurrentStreams
  67. // We won't transition to 'WaitingToIdle' unless we've seen a SETTINGS frame.
  68. self.hasSeenSettings = true
  69. }
  70. }
  71. /// The waiting-to-idle state is used when the connection has become 'ready', has no active
  72. /// RPCs and an idle timeout task has been scheduled. In this state, the connection will be closed
  73. /// once the idle is fired. The task will be cancelled on the creation of a stream.
  74. fileprivate struct WaitingToIdle {
  75. /// Our role in the connection.
  76. var role: Role
  77. /// The last stream ID initiated by the remote peer.
  78. var lastPeerInitiatedStreamID: HTTP2StreamID
  79. /// The maximum number of concurrent streams we are allowed to operate.
  80. var maxConcurrentStreams: Int
  81. /// A task which, when fired, will idle the connection.
  82. var idleTask: Scheduled<Void>
  83. fileprivate init(fromOperating state: Operating, idleTask: Scheduled<Void>) {
  84. // We won't transition to this state unless we've seen a SETTINGS frame.
  85. assert(state.hasSeenSettings)
  86. self.role = state.role
  87. self.lastPeerInitiatedStreamID = state.lastPeerInitiatedStreamID
  88. self.maxConcurrentStreams = state.maxConcurrentStreams
  89. self.idleTask = idleTask
  90. }
  91. }
  92. /// The quiescing state is entered only from the operating state. It may be entered if we receive
  93. /// a GOAWAY frame (the remote peer initiated the quiescing) or we initiate graceful shutdown
  94. /// locally.
  95. fileprivate struct Quiescing: TracksOpenStreams, CanCloseStreams {
  96. /// Our role in the connection.
  97. var role: Role
  98. /// The number of open stream.
  99. var openStreams: Int
  100. /// The last stream ID initiated by the remote peer.
  101. var lastPeerInitiatedStreamID: HTTP2StreamID
  102. /// The maximum number of concurrent streams we are allowed to operate.
  103. var maxConcurrentStreams: Int
  104. /// Whether this peer initiated shutting down.
  105. var initiatedByUs: Bool
  106. fileprivate init(fromOperating state: Operating, initiatedByUs: Bool) {
  107. // If we didn't initiate shutdown, the remote peer must have done so by sending a GOAWAY frame
  108. // in which case we must have seen a SETTINGS frame.
  109. assert(initiatedByUs || state.hasSeenSettings)
  110. self.role = state.role
  111. self.initiatedByUs = initiatedByUs
  112. self.openStreams = state.openStreams
  113. self.lastPeerInitiatedStreamID = state.lastPeerInitiatedStreamID
  114. self.maxConcurrentStreams = state.maxConcurrentStreams
  115. }
  116. }
  117. /// The closing state is entered when one of the previous states initiates a connection closure.
  118. /// From this state the only possible transition is to the closed state.
  119. fileprivate struct Closing {
  120. /// Our role in the connection.
  121. var role: Role
  122. /// Should the client connection manager receive an idle event when we close? (If not then it
  123. /// will attempt to establish a new connection immediately.)
  124. var shouldIdle: Bool
  125. fileprivate init(fromOperating state: Operating) {
  126. self.role = state.role
  127. // Idle if there are no open streams and we've seen the first SETTINGS frame.
  128. self.shouldIdle = !state.hasOpenStreams && state.hasSeenSettings
  129. }
  130. fileprivate init(fromQuiescing state: Quiescing) {
  131. self.role = state.role
  132. // If we initiated the quiescing then we shouldn't go idle (we want to shutdown instead).
  133. self.shouldIdle = !state.initiatedByUs
  134. }
  135. fileprivate init(fromWaitingToIdle state: WaitingToIdle, shouldIdle: Bool = true) {
  136. self.role = state.role
  137. self.shouldIdle = shouldIdle
  138. }
  139. }
  140. fileprivate enum State {
  141. case operating(Operating)
  142. case waitingToIdle(WaitingToIdle)
  143. case quiescing(Quiescing)
  144. case closing(Closing)
  145. case closed
  146. }
  147. /// The set of operations that should be performed as a result of interaction with the state
  148. /// machine.
  149. struct Operations {
  150. /// An event to notify the connection manager about.
  151. private(set) var connectionManagerEvent: ConnectionManagerEvent?
  152. /// An idle task, either scheduling or cancelling an idle timeout.
  153. private(set) var idleTask: IdleTask?
  154. /// Send a GOAWAY frame with the last peer initiated stream ID set to this value.
  155. private(set) var sendGoAwayWithLastPeerInitiatedStreamID: HTTP2StreamID?
  156. /// Whether the channel should be closed.
  157. private(set) var shouldCloseChannel: Bool
  158. fileprivate static let none = Operations()
  159. fileprivate mutating func sendGoAwayFrame(lastPeerInitiatedStreamID streamID: HTTP2StreamID) {
  160. self.sendGoAwayWithLastPeerInitiatedStreamID = streamID
  161. }
  162. fileprivate mutating func cancelIdleTask(_ task: Scheduled<Void>) {
  163. self.idleTask = .cancel(task)
  164. }
  165. fileprivate mutating func scheduleIdleTask() {
  166. self.idleTask = .schedule
  167. }
  168. fileprivate mutating func closeChannel() {
  169. self.shouldCloseChannel = true
  170. }
  171. fileprivate mutating func notifyConnectionManager(about event: ConnectionManagerEvent) {
  172. self.connectionManagerEvent = event
  173. }
  174. private init() {
  175. self.connectionManagerEvent = nil
  176. self.idleTask = nil
  177. self.sendGoAwayWithLastPeerInitiatedStreamID = nil
  178. self.shouldCloseChannel = false
  179. }
  180. }
  181. /// An event to notify the 'ConnectionManager' about.
  182. enum ConnectionManagerEvent {
  183. case inactive
  184. case idle
  185. case ready
  186. case quiescing
  187. }
  188. enum IdleTask {
  189. case schedule
  190. case cancel(Scheduled<Void>)
  191. }
  192. /// The current state.
  193. private var state: State
  194. /// A logger.
  195. private let logger: Logger
  196. /// Create a new state machine.
  197. init(role: Role, logger: Logger) {
  198. self.state = .operating(.init(role: role))
  199. self.logger = logger
  200. }
  201. // MARK: Stream Events
  202. /// An HTTP/2 stream was created.
  203. mutating func streamCreated(withID streamID: HTTP2StreamID) -> Operations {
  204. var operations: Operations = .none
  205. switch self.state {
  206. case var .operating(state):
  207. // Create the stream.
  208. state.streamCreated(streamID, logger: self.logger)
  209. self.state = .operating(state)
  210. case let .waitingToIdle(state):
  211. var operating = Operating(fromWaitingToIdle: state)
  212. operating.streamCreated(streamID, logger: self.logger)
  213. self.state = .operating(operating)
  214. operations.cancelIdleTask(state.idleTask)
  215. case .quiescing:
  216. // Streams can't be created if we're quiescing.
  217. preconditionFailure()
  218. case .closing, .closed:
  219. ()
  220. }
  221. return operations
  222. }
  223. /// An HTTP/2 stream was closed.
  224. mutating func streamClosed(withID streamID: HTTP2StreamID) -> Operations {
  225. var operations: Operations = .none
  226. switch self.state {
  227. case var .operating(state):
  228. state.streamClosed(streamID, logger: self.logger)
  229. if state.hasSeenSettings, !state.hasOpenStreams {
  230. operations.scheduleIdleTask()
  231. }
  232. self.state = .operating(state)
  233. case .waitingToIdle:
  234. // If we're waiting to idle then there can't be any streams open which can be closed.
  235. preconditionFailure()
  236. case var .quiescing(state):
  237. state.streamClosed(streamID, logger: self.logger)
  238. if state.hasOpenStreams {
  239. self.state = .quiescing(state)
  240. } else {
  241. self.state = .closing(.init(fromQuiescing: state))
  242. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  243. operations.closeChannel()
  244. }
  245. case .closing, .closed:
  246. ()
  247. }
  248. return operations
  249. }
  250. // MARK: - Idle Events
  251. /// The given task was scheduled to idle the connection.
  252. mutating func scheduledIdleTimeoutTask(_ task: Scheduled<Void>) -> Operations {
  253. var operations: Operations = .none
  254. switch self.state {
  255. case let .operating(state):
  256. if state.hasOpenStreams {
  257. operations.cancelIdleTask(task)
  258. } else {
  259. self.state = .waitingToIdle(.init(fromOperating: state, idleTask: task))
  260. }
  261. case .waitingToIdle:
  262. // There's already an idle task.
  263. preconditionFailure()
  264. case .quiescing, .closing, .closed:
  265. operations.cancelIdleTask(task)
  266. }
  267. return operations
  268. }
  269. /// The idle timeout task fired, the connection should be idled.
  270. mutating func idleTimeoutTaskFired() -> Operations {
  271. var operations: Operations = .none
  272. switch self.state {
  273. case let .waitingToIdle(state):
  274. self.state = .closing(.init(fromWaitingToIdle: state))
  275. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  276. operations.closeChannel()
  277. // We're either operating on streams, streams are going away, or the connection is going away
  278. // so we don't need to idle the connection.
  279. case .operating, .quiescing, .closing, .closed:
  280. ()
  281. }
  282. return operations
  283. }
  284. // MARK: - Shutdown Events
  285. /// Close the connection, this can be caused as a result of a keepalive timeout (i.e. the server
  286. /// has become unresponsive), we'll bin this connection as a result.
  287. mutating func shutdownNow() -> Operations {
  288. var operations = Operations.none
  289. switch self.state {
  290. case let .operating(state):
  291. var closing = Closing(fromOperating: state)
  292. closing.shouldIdle = false
  293. self.state = .closing(closing)
  294. operations.closeChannel()
  295. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  296. case let .waitingToIdle(state):
  297. // Don't idle.
  298. self.state = .closing(Closing(fromWaitingToIdle: state, shouldIdle: false))
  299. operations.closeChannel()
  300. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  301. operations.cancelIdleTask(state.idleTask)
  302. case let .quiescing(state):
  303. self.state = .closing(Closing(fromQuiescing: state))
  304. // We've already sent a GOAWAY frame if we're in this state, just close.
  305. operations.closeChannel()
  306. case .closing, .closed:
  307. ()
  308. }
  309. return operations
  310. }
  311. /// Initiate a graceful shutdown of this connection, that is, begin quiescing.
  312. mutating func initiateGracefulShutdown() -> Operations {
  313. var operations: Operations = .none
  314. switch self.state {
  315. case let .operating(state):
  316. // Send a GOAWAY frame.
  317. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  318. if state.hasOpenStreams {
  319. // There are open streams: send a GOAWAY frame and wait for the stream count to reach zero.
  320. //
  321. // It's okay if we haven't seen a SETTINGS frame at this point; we've initiated the shutdown
  322. // so making a connection is ready isn't necessary.
  323. operations.notifyConnectionManager(about: .quiescing)
  324. self.state = .quiescing(.init(fromOperating: state, initiatedByUs: true))
  325. } else {
  326. // No open streams: send a GOAWAY frame and close the channel.
  327. self.state = .closing(.init(fromOperating: state))
  328. operations.closeChannel()
  329. }
  330. case let .waitingToIdle(state):
  331. // There can't be any open streams, but we have a few loose ends to clear up: we need to
  332. // cancel the idle timeout, send a GOAWAY frame and then close. We don't want to idle from the
  333. // closing state: we want to shutdown instead.
  334. self.state = .closing(.init(fromWaitingToIdle: state, shouldIdle: false))
  335. operations.cancelIdleTask(state.idleTask)
  336. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  337. operations.closeChannel()
  338. case var .quiescing(state):
  339. // We're already quiescing: either the remote initiated it or we're initiating it more than
  340. // once. Set ourselves as the initiator to ensure we don't idle when we eventually close, this
  341. // is important for the client: if the server initiated this then we establish a new
  342. // connection when we close, unless we also initiated shutdown.
  343. state.initiatedByUs = true
  344. self.state = .quiescing(state)
  345. case var .closing(state):
  346. // We've already called 'close()', make sure we don't go idle.
  347. state.shouldIdle = false
  348. self.state = .closing(state)
  349. case .closed:
  350. ()
  351. }
  352. return operations
  353. }
  354. /// We've received a GOAWAY frame from the remote peer. Either the remote peer wants to close the
  355. /// connection or they're responding to us shutting down the connection.
  356. mutating func receiveGoAway() -> Operations {
  357. var operations: Operations = .none
  358. switch self.state {
  359. case let .operating(state):
  360. // A SETTINGS frame MUST follow the connection preface. (RFC 7540 § 3.5)
  361. assert(state.hasSeenSettings)
  362. if state.hasOpenStreams {
  363. operations.notifyConnectionManager(about: .quiescing)
  364. self.state = .quiescing(.init(fromOperating: state, initiatedByUs: false))
  365. } else {
  366. // No open streams, we can close as well.
  367. self.state = .closing(.init(fromOperating: state))
  368. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  369. operations.closeChannel()
  370. }
  371. case let .waitingToIdle(state):
  372. // There can't be any open streams, but we have a few loose ends to clear up: we need to
  373. // cancel the idle timeout, send a GOAWAY frame and then close.
  374. self.state = .closing(.init(fromWaitingToIdle: state))
  375. operations.cancelIdleTask(state.idleTask)
  376. operations.sendGoAwayFrame(lastPeerInitiatedStreamID: state.lastPeerInitiatedStreamID)
  377. operations.closeChannel()
  378. case .quiescing:
  379. // We're already quiescing, this changes nothing.
  380. ()
  381. case .closing, .closed:
  382. // We're already closing/closed (so must have emitted a GOAWAY frame already). Ignore this.
  383. ()
  384. }
  385. return operations
  386. }
  387. mutating func receiveSettings(_ settings: HTTP2Settings) -> Operations {
  388. // Log the change in settings.
  389. self.logger.debug(
  390. "HTTP2 settings update",
  391. metadata: Dictionary(settings.map {
  392. ("\($0.parameter.loggingMetadataKey)", "\($0.value)")
  393. }, uniquingKeysWith: { a, _ in a })
  394. )
  395. var operations: Operations = .none
  396. switch self.state {
  397. case var .operating(state):
  398. // If we hadn't previously seen settings then we need to notify the client connection manager
  399. // that we're now ready.
  400. if !state.hasSeenSettings {
  401. operations.notifyConnectionManager(about: .ready)
  402. state.hasSeenSettings = true
  403. // Now that we know the connection is ready, we may want to start an idle timeout as well.
  404. if !state.hasOpenStreams {
  405. operations.scheduleIdleTask()
  406. }
  407. }
  408. // Update max concurrent streams.
  409. if let maxStreams = settings.last(where: { $0.parameter == .maxConcurrentStreams })?.value {
  410. state.maxConcurrentStreams = maxStreams
  411. }
  412. self.state = .operating(state)
  413. case var .waitingToIdle(state):
  414. // Update max concurrent streams.
  415. if let maxStreams = settings.last(where: { $0.parameter == .maxConcurrentStreams })?.value {
  416. state.maxConcurrentStreams = maxStreams
  417. }
  418. self.state = .waitingToIdle(state)
  419. case .quiescing, .closing, .closed:
  420. ()
  421. }
  422. return operations
  423. }
  424. // MARK: - Channel Events
  425. // (Other channel events aren't included here as they don't impact the state machine.)
  426. /// 'channelActive' was called in the idle handler holding this state machine.
  427. mutating func channelInactive() -> Operations {
  428. var operations: Operations = .none
  429. switch self.state {
  430. case let .operating(state):
  431. self.state = .closed
  432. // We unexpectedly became inactive.
  433. if !state.hasSeenSettings || state.hasOpenStreams {
  434. // Haven't seen settings, or we've seen settings and there are open streams.
  435. operations.notifyConnectionManager(about: .inactive)
  436. } else {
  437. // Have seen settings and there are no open streams.
  438. operations.notifyConnectionManager(about: .idle)
  439. }
  440. case let .waitingToIdle(state):
  441. self.state = .closed
  442. // We were going to idle anyway.
  443. operations.notifyConnectionManager(about: .idle)
  444. operations.cancelIdleTask(state.idleTask)
  445. case let .quiescing(state):
  446. self.state = .closed
  447. if state.initiatedByUs || state.hasOpenStreams {
  448. operations.notifyConnectionManager(about: .inactive)
  449. } else {
  450. operations.notifyConnectionManager(about: .idle)
  451. }
  452. case let .closing(state):
  453. self.state = .closed
  454. if state.shouldIdle {
  455. operations.notifyConnectionManager(about: .idle)
  456. } else {
  457. operations.notifyConnectionManager(about: .inactive)
  458. }
  459. case .closed:
  460. ()
  461. }
  462. return operations
  463. }
  464. }
  465. // MARK: - Helper Protocols
  466. private protocol TracksOpenStreams {
  467. /// The number of open streams.
  468. var openStreams: Int { get set }
  469. }
  470. extension TracksOpenStreams {
  471. /// Whether any streams are open.
  472. fileprivate var hasOpenStreams: Bool {
  473. return self.openStreams != 0
  474. }
  475. }
  476. private protocol CanOpenStreams: TracksOpenStreams {
  477. /// The role of this peer in the connection.
  478. var role: GRPCIdleHandlerStateMachine.Role { get }
  479. /// The ID of the stream most recently initiated by the remote peer.
  480. var lastPeerInitiatedStreamID: HTTP2StreamID { get set }
  481. /// The maximum number of concurrent streams.
  482. var maxConcurrentStreams: Int { get set }
  483. mutating func streamCreated(_ streamID: HTTP2StreamID, logger: Logger)
  484. }
  485. extension CanOpenStreams {
  486. fileprivate mutating func streamCreated(_ streamID: HTTP2StreamID, logger: Logger) {
  487. self.openStreams += 1
  488. switch self.role {
  489. case .client where streamID.isServerInitiated:
  490. self.lastPeerInitiatedStreamID = streamID
  491. case .server where streamID.isClientInitiated:
  492. self.lastPeerInitiatedStreamID = streamID
  493. default:
  494. ()
  495. }
  496. logger.debug("HTTP2 stream created", metadata: [
  497. MetadataKey.h2StreamID: "\(streamID)",
  498. MetadataKey.h2ActiveStreams: "\(self.openStreams)",
  499. ])
  500. if self.openStreams == self.maxConcurrentStreams {
  501. logger.warning("HTTP2 max concurrent stream limit reached", metadata: [
  502. MetadataKey.h2ActiveStreams: "\(self.openStreams)",
  503. ])
  504. }
  505. }
  506. }
  507. private protocol CanCloseStreams: TracksOpenStreams {
  508. /// Notes that a stream has closed.
  509. mutating func streamClosed(_ streamID: HTTP2StreamID, logger: Logger)
  510. }
  511. extension CanCloseStreams {
  512. fileprivate mutating func streamClosed(_ streamID: HTTP2StreamID, logger: Logger) {
  513. self.openStreams -= 1
  514. logger.debug("HTTP2 stream closed", metadata: [
  515. MetadataKey.h2StreamID: "\(streamID)",
  516. MetadataKey.h2ActiveStreams: "\(self.openStreams)",
  517. ])
  518. }
  519. }