ServerConnectionManagementHandler.swift 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. /*
  2. * Copyright 2024, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import NIOCore
  17. import NIOHTTP2
  18. /// A `ChannelHandler` which manages the lifecycle of a gRPC connection over HTTP/2.
  19. ///
  20. /// This handler is responsible for managing several aspects of the connection. These include:
  21. /// 1. Handling the graceful close of connections. When gracefully closing a connection the server
  22. /// sends a GOAWAY frame with the last stream ID set to the maximum stream ID allowed followed by
  23. /// a PING frame. On receipt of the PING frame the server sends another GOAWAY frame with the
  24. /// highest ID of all streams which have been opened. After this, the handler closes the
  25. /// connection once all streams are closed.
  26. /// 2. Enforcing that graceful shutdown doesn't exceed a configured limit (if configured).
  27. /// 3. Gracefully closing the connection once it reaches the maximum configured age (if configured).
  28. /// 4. Gracefully closing the connection once it has been idle for a given period of time (if
  29. /// configured).
  30. /// 5. Periodically sending keep alive pings to the client (if configured) and closing the
  31. /// connection if necessary.
  32. /// 6. Policing pings sent by the client to ensure that the client isn't misconfigured to send
  33. /// too many pings.
  34. ///
  35. /// Some of the behaviours are described in:
  36. /// - [gRFC A8](https://github.com/grpc/proposal/blob/master/A8-client-side-keepalive.md), and
  37. /// - [gRFC A9](https://github.com/grpc/proposal/blob/master/A9-server-side-conn-mgt.md).
  38. final class ServerConnectionManagementHandler: ChannelDuplexHandler {
  39. typealias InboundIn = HTTP2Frame
  40. typealias InboundOut = HTTP2Frame
  41. typealias OutboundIn = HTTP2Frame
  42. typealias OutboundOut = HTTP2Frame
  43. /// The `EventLoop` of the `Channel` this handler exists in.
  44. private let eventLoop: EventLoop
  45. /// The maximum amount of time a connection may be idle for. If the connection remains idle
  46. /// (i.e. has no open streams) for this period of time then the connection will be gracefully
  47. /// closed.
  48. private var maxIdleTimer: Timer?
  49. /// The maximum age of a connection. If the connection remains open after this amount of time
  50. /// then it will be gracefully closed.
  51. private var maxAgeTimer: Timer?
  52. /// The maximum amount of time a connection may spend closing gracefully, after which it is
  53. /// closed abruptly. The timer starts after the second GOAWAY frame has been sent.
  54. private var maxGraceTimer: Timer?
  55. /// The amount of time to wait before sending a keep alive ping.
  56. private var keepAliveTimer: Timer?
  57. /// The amount of time the client has to reply after sending a keep alive ping. Only used if
  58. /// `keepAliveTimer` is set.
  59. private var keepAliveTimeoutTimer: Timer
  60. /// Opaque data sent in keep alive pings.
  61. private let keepAlivePingData: HTTP2PingData
  62. /// Whether a flush is pending.
  63. private var flushPending: Bool
  64. /// Whether `channelRead` has been called and `channelReadComplete` hasn't yet been called.
  65. /// Resets once `channelReadComplete` returns.
  66. private var inReadLoop: Bool
  67. /// The current state of the connection.
  68. private var state: StateMachine
  69. /// The clock.
  70. private let clock: Clock
  71. /// A clock providing the current time.
  72. ///
  73. /// This is necessary for testing where a manual clock can be used and advanced from the test.
  74. /// While NIO's `EmbeddedEventLoop` provides control over its view of time (and therefore any
  75. /// events scheduled on it) it doesn't offer a way to get the current time. This is usually done
  76. /// via `NIODeadline`.
  77. enum Clock {
  78. case nio
  79. case manual(Manual)
  80. func now() -> NIODeadline {
  81. switch self {
  82. case .nio:
  83. return .now()
  84. case .manual(let clock):
  85. return clock.time
  86. }
  87. }
  88. final class Manual {
  89. private(set) var time: NIODeadline
  90. init() {
  91. self.time = .uptimeNanoseconds(0)
  92. }
  93. func advance(by amount: TimeAmount) {
  94. self.time = self.time + amount
  95. }
  96. }
  97. }
  98. /// Stats about recently written frames. Used to determine whether to reset keep-alive state.
  99. private var frameStats: FrameStats
  100. struct FrameStats {
  101. private(set) var didWriteHeadersOrData = false
  102. /// Mark that a HEADERS frame has been written.
  103. mutating func wroteHeaders() {
  104. self.didWriteHeadersOrData = true
  105. }
  106. /// Mark that DATA frame has been written.
  107. mutating func wroteData() {
  108. self.didWriteHeadersOrData = true
  109. }
  110. /// Resets the state such that no HEADERS or DATA frames have been written.
  111. mutating func reset() {
  112. self.didWriteHeadersOrData = false
  113. }
  114. }
  115. /// A synchronous view over this handler.
  116. var syncView: SyncView {
  117. return SyncView(self)
  118. }
  119. /// A synchronous view over this handler.
  120. ///
  121. /// Methods on this view *must* be called from the same `EventLoop` as the `Channel` in which
  122. /// this handler exists.
  123. struct SyncView {
  124. private let handler: ServerConnectionManagementHandler
  125. fileprivate init(_ handler: ServerConnectionManagementHandler) {
  126. self.handler = handler
  127. }
  128. /// Notify the handler that the connection has received a flush event.
  129. func connectionWillFlush() {
  130. // The handler can't rely on `flush(context:)` due to its expected position in the pipeline.
  131. // It's expected to be placed after the HTTP/2 handler (i.e. closer to the application) as
  132. // it needs to receive HTTP/2 frames. However, flushes from stream channels aren't sent down
  133. // the entire connection channel, instead they are sent from the point in the channel they
  134. // are multiplexed from (either the HTTP/2 handler or the HTTP/2 multiplexing handler,
  135. // depending on how multiplexing is configured).
  136. self.handler.eventLoop.assertInEventLoop()
  137. if self.handler.frameStats.didWriteHeadersOrData {
  138. self.handler.frameStats.reset()
  139. self.handler.state.resetKeepAliveState()
  140. }
  141. }
  142. /// Notify the handler that a HEADERS frame was written in the last write loop.
  143. func wroteHeadersFrame() {
  144. self.handler.eventLoop.assertInEventLoop()
  145. self.handler.frameStats.wroteHeaders()
  146. }
  147. /// Notify the handler that a DATA frame was written in the last write loop.
  148. func wroteDataFrame() {
  149. self.handler.eventLoop.assertInEventLoop()
  150. self.handler.frameStats.wroteData()
  151. }
  152. }
  153. /// Creates a new handler which manages the lifecycle of a connection.
  154. ///
  155. /// - Parameters:
  156. /// - eventLoop: The `EventLoop` of the `Channel` this handler is placed in.
  157. /// - maxIdleTime: The maximum amount time a connection may be idle for before being closed.
  158. /// - maxAge: The maximum amount of time a connection may exist before being gracefully closed.
  159. /// - maxGraceTime: The maximum amount of time that the connection has to close gracefully.
  160. /// - keepAliveTime: The amount of time to wait after reading data before sending a keep-alive
  161. /// ping.
  162. /// - keepAliveTimeout: The amount of time the client has to reply after the server sends a
  163. /// keep-alive ping to keep the connection open. The connection is closed if no reply
  164. /// is received.
  165. /// - allowKeepAliveWithoutCalls: Whether the server allows the client to send keep-alive pings
  166. /// when there are no calls in progress.
  167. /// - minPingIntervalWithoutCalls: The minimum allowed interval the client is allowed to send
  168. /// keep-alive pings. Pings more frequent than this interval count as 'strikes' and the
  169. /// connection is closed if there are too many strikes.
  170. /// - clock: A clock providing the current time.
  171. init(
  172. eventLoop: EventLoop,
  173. maxIdleTime: TimeAmount?,
  174. maxAge: TimeAmount?,
  175. maxGraceTime: TimeAmount?,
  176. keepAliveTime: TimeAmount?,
  177. keepAliveTimeout: TimeAmount?,
  178. allowKeepAliveWithoutCalls: Bool,
  179. minPingIntervalWithoutCalls: TimeAmount,
  180. clock: Clock = .nio
  181. ) {
  182. self.eventLoop = eventLoop
  183. self.maxIdleTimer = maxIdleTime.map { Timer(delay: $0) }
  184. self.maxAgeTimer = maxAge.map { Timer(delay: $0) }
  185. self.maxGraceTimer = maxGraceTime.map { Timer(delay: $0) }
  186. self.keepAliveTimer = keepAliveTime.map { Timer(delay: $0) }
  187. // Always create a keep alive timeout timer, it's only used if there is a keep alive timer.
  188. self.keepAliveTimeoutTimer = Timer(delay: keepAliveTimeout ?? .seconds(20))
  189. // Generate a random value to be used as keep alive ping data.
  190. let pingData = UInt64.random(in: .min ... .max)
  191. self.keepAlivePingData = HTTP2PingData(withInteger: pingData)
  192. self.state = StateMachine(
  193. allowKeepAliveWithoutCalls: allowKeepAliveWithoutCalls,
  194. minPingReceiveIntervalWithoutCalls: minPingIntervalWithoutCalls,
  195. goAwayPingData: HTTP2PingData(withInteger: ~pingData)
  196. )
  197. self.flushPending = false
  198. self.inReadLoop = false
  199. self.clock = clock
  200. self.frameStats = FrameStats()
  201. }
  202. func handlerAdded(context: ChannelHandlerContext) {
  203. assert(context.eventLoop === self.eventLoop)
  204. }
  205. func channelActive(context: ChannelHandlerContext) {
  206. self.maxAgeTimer?.schedule(on: context.eventLoop) {
  207. self.initiateGracefulShutdown(context: context)
  208. }
  209. self.maxIdleTimer?.schedule(on: context.eventLoop) {
  210. self.initiateGracefulShutdown(context: context)
  211. }
  212. self.keepAliveTimer?.schedule(on: context.eventLoop) {
  213. self.keepAliveTimerFired(context: context)
  214. }
  215. context.fireChannelActive()
  216. }
  217. func channelInactive(context: ChannelHandlerContext) {
  218. self.maxIdleTimer?.cancel()
  219. self.maxAgeTimer?.cancel()
  220. self.maxGraceTimer?.cancel()
  221. self.keepAliveTimer?.cancel()
  222. self.keepAliveTimeoutTimer.cancel()
  223. context.fireChannelInactive()
  224. }
  225. func userInboundEventTriggered(context: ChannelHandlerContext, event: Any) {
  226. switch event {
  227. case let event as NIOHTTP2StreamCreatedEvent:
  228. // The connection isn't idle if a stream is open.
  229. self.maxIdleTimer?.cancel()
  230. self.state.streamOpened(event.streamID)
  231. case let event as StreamClosedEvent:
  232. switch self.state.streamClosed(event.streamID) {
  233. case .startIdleTimer:
  234. self.maxIdleTimer?.schedule(on: context.eventLoop) {
  235. self.initiateGracefulShutdown(context: context)
  236. }
  237. case .close:
  238. context.close(mode: .all, promise: nil)
  239. case .none:
  240. ()
  241. }
  242. default:
  243. ()
  244. }
  245. context.fireUserInboundEventTriggered(event)
  246. }
  247. func channelRead(context: ChannelHandlerContext, data: NIOAny) {
  248. self.inReadLoop = true
  249. // Any read data indicates that the connection is alive so cancel the keep-alive timers.
  250. self.keepAliveTimer?.cancel()
  251. self.keepAliveTimeoutTimer.cancel()
  252. let frame = self.unwrapInboundIn(data)
  253. switch frame.payload {
  254. case .ping(let data, let ack):
  255. if ack {
  256. self.handlePingAck(context: context, data: data)
  257. } else {
  258. self.handlePing(context: context, data: data)
  259. }
  260. default:
  261. () // Only interested in PING frames, ignore the rest.
  262. }
  263. context.fireChannelRead(data)
  264. }
  265. func channelReadComplete(context: ChannelHandlerContext) {
  266. while self.flushPending {
  267. self.flushPending = false
  268. context.flush()
  269. }
  270. self.inReadLoop = false
  271. // Done reading: schedule the keep-alive timer.
  272. self.keepAliveTimer?.schedule(on: context.eventLoop) {
  273. self.keepAliveTimerFired(context: context)
  274. }
  275. context.fireChannelReadComplete()
  276. }
  277. func flush(context: ChannelHandlerContext) {
  278. self.maybeFlush(context: context)
  279. }
  280. }
  281. extension ServerConnectionManagementHandler {
  282. private func maybeFlush(context: ChannelHandlerContext) {
  283. if self.inReadLoop {
  284. self.flushPending = true
  285. } else {
  286. context.flush()
  287. }
  288. }
  289. private func initiateGracefulShutdown(context: ChannelHandlerContext) {
  290. context.eventLoop.assertInEventLoop()
  291. // Cancel any timers if initiating shutdown.
  292. self.maxIdleTimer?.cancel()
  293. self.maxAgeTimer?.cancel()
  294. self.keepAliveTimer?.cancel()
  295. self.keepAliveTimeoutTimer.cancel()
  296. switch self.state.startGracefulShutdown() {
  297. case .sendGoAwayAndPing(let pingData):
  298. // There's a time window between the server sending a GOAWAY frame and the client receiving
  299. // it. During this time the client may open new streams as it doesn't yet know about the
  300. // GOAWAY frame.
  301. //
  302. // The server therefore sends a GOAWAY with the last stream ID set to the maximum stream ID
  303. // and follows it with a PING frame. When the server receives the ack for the PING frame it
  304. // knows that the client has received the initial GOAWAY frame and that no more streams may
  305. // be opened. The server can then send an additional GOAWAY frame with a more representative
  306. // last stream ID.
  307. let goAway = HTTP2Frame(
  308. streamID: .rootStream,
  309. payload: .goAway(
  310. lastStreamID: .maxID,
  311. errorCode: .noError,
  312. opaqueData: nil
  313. )
  314. )
  315. let ping = HTTP2Frame(streamID: .rootStream, payload: .ping(pingData, ack: false))
  316. context.write(self.wrapOutboundOut(goAway), promise: nil)
  317. context.write(self.wrapOutboundOut(ping), promise: nil)
  318. self.maybeFlush(context: context)
  319. case .none:
  320. () // Already shutting down.
  321. }
  322. }
  323. private func handlePing(context: ChannelHandlerContext, data: HTTP2PingData) {
  324. switch self.state.receivedPing(atTime: self.clock.now(), data: data) {
  325. case .enhanceYourCalmThenClose(let streamID):
  326. let goAway = HTTP2Frame(
  327. streamID: .rootStream,
  328. payload: .goAway(
  329. lastStreamID: streamID,
  330. errorCode: .enhanceYourCalm,
  331. opaqueData: context.channel.allocator.buffer(string: "too_many_pings")
  332. )
  333. )
  334. context.write(self.wrapOutboundOut(goAway), promise: nil)
  335. self.maybeFlush(context: context)
  336. context.close(promise: nil)
  337. case .sendAck:
  338. let ping = HTTP2Frame(streamID: .rootStream, payload: .ping(data, ack: true))
  339. context.write(self.wrapOutboundOut(ping), promise: nil)
  340. self.maybeFlush(context: context)
  341. case .none:
  342. ()
  343. }
  344. }
  345. private func handlePingAck(context: ChannelHandlerContext, data: HTTP2PingData) {
  346. switch self.state.receivedPingAck(data: data) {
  347. case .sendGoAway(let streamID, let close):
  348. let goAway = HTTP2Frame(
  349. streamID: .rootStream,
  350. payload: .goAway(lastStreamID: streamID, errorCode: .noError, opaqueData: nil)
  351. )
  352. context.write(self.wrapOutboundOut(goAway), promise: nil)
  353. self.maybeFlush(context: context)
  354. if close {
  355. context.close(promise: nil)
  356. } else {
  357. // RPCs may have a grace period for finishing once the second GOAWAY frame has finished.
  358. // If this is set close the connection abruptly once the grace period passes.
  359. self.maxGraceTimer?.schedule(on: context.eventLoop) {
  360. context.close(promise: nil)
  361. }
  362. }
  363. case .none:
  364. ()
  365. }
  366. }
  367. private func keepAliveTimerFired(context: ChannelHandlerContext) {
  368. let ping = HTTP2Frame(streamID: .rootStream, payload: .ping(self.keepAlivePingData, ack: false))
  369. context.write(self.wrapInboundOut(ping), promise: nil)
  370. self.maybeFlush(context: context)
  371. // Schedule a timeout on waiting for the response.
  372. self.keepAliveTimeoutTimer.schedule(on: context.eventLoop) {
  373. self.initiateGracefulShutdown(context: context)
  374. }
  375. }
  376. }