RetryThrottle.swift 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. /*
  2. * Copyright 2023, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. private import Synchronization
  17. /// A throttle used to rate-limit retries and hedging attempts.
  18. ///
  19. /// gRPC prevents servers from being overloaded by retries and hedging by using a token-based
  20. /// throttling mechanism at the transport level.
  21. ///
  22. /// Each client transport maintains a throttle for the server it is connected to and gRPC records
  23. /// successful and failed RPC attempts. Successful attempts increment the number of tokens
  24. /// by ``tokenRatio`` and failed attempts decrement the available tokens by one. In the context
  25. /// of throttling, a failed attempt is one where the server terminates the RPC with a status code
  26. /// which is retryable or non fatal (as defined by ``RetryPolicy/retryableStatusCodes`` and
  27. /// ``HedgingPolicy/nonFatalStatusCodes``) or when the client receives a pushback response from
  28. /// the server.
  29. ///
  30. /// See also [gRFC A6: client retries](https://github.com/grpc/proposal/blob/master/A6-client-retries.md).
  31. @available(macOS 15.0, iOS 18.0, watchOS 11.0, tvOS 18.0, visionOS 2.0, *)
  32. public final class RetryThrottle: Sendable {
  33. // Note: only three figures after the decimal point from the original token ratio are used so
  34. // all computation is done a scaled number of tokens (tokens * 1000). This allows us to do all
  35. // computation in integer space.
  36. /// The number of tokens available, multiplied by 1000.
  37. private let scaledTokensAvailable: Mutex<Int>
  38. /// The number of tokens, multiplied by 1000.
  39. private let scaledTokenRatio: Int
  40. /// The maximum number of tokens, multiplied by 1000.
  41. private let scaledMaxTokens: Int
  42. /// The retry threshold, multiplied by 1000. If ``scaledTokensAvailable`` is above this then
  43. /// retries are permitted.
  44. private let scaledRetryThreshold: Int
  45. /// Returns the throttling token ratio.
  46. ///
  47. /// The number of tokens held by the throttle is incremented by this value for each successful
  48. /// response. In the context of throttling, a successful response is one which:
  49. /// - receives metadata from the server, or
  50. /// - is terminated with a non-retryable or fatal status code.
  51. ///
  52. /// If the response is a pushback response then it is not considered to be successful, even if
  53. /// either of the preceding conditions are met.
  54. public var tokenRatio: Double {
  55. Double(self.scaledTokenRatio) / 1000
  56. }
  57. /// The maximum number of tokens the throttle may hold.
  58. public var maxTokens: Int {
  59. self.scaledMaxTokens / 1000
  60. }
  61. /// The number of tokens the throttle currently has.
  62. ///
  63. /// If this value is less than or equal to the retry threshold (defined as `maxTokens / 2`)
  64. /// then RPCs will not be retried and hedging will be disabled.
  65. public var tokens: Double {
  66. self.scaledTokensAvailable.withLock {
  67. Double($0) / 1000
  68. }
  69. }
  70. /// Returns whether retries and hedging are permitted at this time.
  71. public var isRetryPermitted: Bool {
  72. self.scaledTokensAvailable.withLock {
  73. $0 > self.scaledRetryThreshold
  74. }
  75. }
  76. /// Create a new throttle.
  77. ///
  78. /// - Parameters:
  79. /// - maxTokens: The maximum number of tokens available. Must be in the range `1...1000`.
  80. /// - tokenRatio: The number of tokens to increment the available tokens by for successful
  81. /// responses. See the documentation on this type for a description of what counts as a
  82. /// successful response. Note that only three decimal places are used from this value.
  83. /// - Precondition: `maxTokens` must be in the range `1...1000`.
  84. /// - Precondition: `tokenRatio` must be `>= 0.001`.
  85. public init(maxTokens: Int, tokenRatio: Double) {
  86. precondition(
  87. (1 ... 1000).contains(maxTokens),
  88. "maxTokens must be in the range 1...1000 (is \(maxTokens))"
  89. )
  90. let scaledTokenRatio = Int(tokenRatio * 1000)
  91. precondition(scaledTokenRatio > 0, "tokenRatio must be >= 0.001 (is \(tokenRatio))")
  92. let scaledTokens = maxTokens * 1000
  93. self.scaledMaxTokens = scaledTokens
  94. self.scaledRetryThreshold = scaledTokens / 2
  95. self.scaledTokenRatio = scaledTokenRatio
  96. self.scaledTokensAvailable = Mutex(scaledTokens)
  97. }
  98. /// Create a new throttle.
  99. ///
  100. /// - Parameter policy: The policy to use to configure the throttle.
  101. public convenience init(policy: ServiceConfig.RetryThrottling) {
  102. self.init(maxTokens: policy.maxTokens, tokenRatio: policy.tokenRatio)
  103. }
  104. /// Records a success, adding a token to the throttle.
  105. @usableFromInline
  106. func recordSuccess() {
  107. self.scaledTokensAvailable.withLock { value in
  108. value = min(self.scaledMaxTokens, value &+ self.scaledTokenRatio)
  109. }
  110. }
  111. /// Records a failure, removing tokens from the throttle.
  112. /// - Returns: Whether retries will now be throttled.
  113. @usableFromInline
  114. @discardableResult
  115. func recordFailure() -> Bool {
  116. self.scaledTokensAvailable.withLock { value in
  117. value = max(0, value &- 1000)
  118. return value <= self.scaledRetryThreshold
  119. }
  120. }
  121. }