Browse Source

Idle client connection on 'channelInactive' if there are no active RPCs (#967)

Motivation:

Connections are idled if there are no RPCs for a given timeout. A
connection may also be idled if the remote sends it a GO_AWAY frame.
However, If the remote fails to send a GO_AWAY then from the perspective
of the client, the connection has been dropped and it will attempt
re-establish a connection. As a result a connection may bounce from
'ready' to 'transientFailure' to 'connecting' and back to 'ready' even
though there are no active RPCs.

Modifications:

- Idle the client connection on 'channelInactive' when there are no
  active RPCs and we know that the connection was previously up-and-running.
- Update tests.

Result:

We avoid creating unnecessary connections when the connection is dropped
and there are no active RPCs.
George Barnett 5 years ago
parent
commit
def0b59363

+ 73 - 6
Sources/GRPC/ConnectionManager.swift

@@ -423,7 +423,19 @@ internal class ConnectionManager {
     case .shutdown:
       channel.close(mode: .all, promise: nil)
 
-    case .idle, .active, .ready, .transientFailure:
+    // These cases are purposefully separated: some crash reporting services provide stack traces
+    // which don't include the precondition failure message (which contain the invalid state we were
+    // in). Keeping the cases separate allows us work out the state from the line number.
+    case .idle:
+      self.invalidState()
+
+    case .active:
+      self.invalidState()
+
+    case .ready:
+      self.invalidState()
+
+    case .transientFailure:
       self.invalidState()
     }
   }
@@ -479,7 +491,13 @@ internal class ConnectionManager {
     case .shutdown:
       ()
 
-    case .connecting, .transientFailure:
+    // These cases are purposefully separated: some crash reporting services provide stack traces
+    // which don't include the precondition failure message (which contain the invalid state we were
+    // in). Keeping the cases separate allows us work out the state from the line number.
+    case .connecting:
+      self.invalidState()
+
+    case .transientFailure:
       self.invalidState()
     }
   }
@@ -500,7 +518,19 @@ internal class ConnectionManager {
     case .shutdown:
       ()
 
-    case .idle, .transientFailure, .connecting, .ready:
+    // These cases are purposefully separated: some crash reporting services provide stack traces
+    // which don't include the precondition failure message (which contain the invalid state we were
+    // in). Keeping the cases separate allows us work out the state from the line number.
+    case .idle:
+      self.invalidState()
+
+    case .transientFailure:
+      self.invalidState()
+
+    case .connecting:
+      self.invalidState()
+
+    case .ready:
       self.invalidState()
     }
   }
@@ -523,7 +553,22 @@ internal class ConnectionManager {
     case let .ready(state):
       self.state = .idle(IdleState(configuration: state.configuration))
 
-    case .idle, .connecting, .transientFailure, .shutdown:
+    case .shutdown:
+      // This is expected when the connection is closed by the user: when the channel becomes
+      // inactive and there are no outstanding RPCs, 'idle()' will be called instead of
+      // 'channelInactive()'.
+      ()
+
+    // These cases are purposefully separated: some crash reporting services provide stack traces
+    // which don't include the precondition failure message (which contain the invalid state we were
+    // in). Keeping the cases separate allows us work out the state from the line number.
+    case .idle:
+      self.invalidState()
+
+    case .connecting:
+      self.invalidState()
+
+    case .transientFailure:
       self.invalidState()
     }
   }
@@ -560,7 +605,20 @@ extension ConnectionManager {
       ()
 
     // We can't fail to connect if we aren't trying.
-    case .idle, .active, .ready, .transientFailure:
+    //
+    // These cases are purposefully separated: some crash reporting services provide stack traces
+    // which don't include the precondition failure message (which contain the invalid state we were
+    // in). Keeping the cases separate allows us work out the state from the line number.
+    case .idle:
+      self.invalidState()
+
+    case .active:
+      self.invalidState()
+
+    case .ready:
+      self.invalidState()
+
+    case .transientFailure:
       self.invalidState()
     }
   }
@@ -590,7 +648,16 @@ extension ConnectionManager {
     case .shutdown:
       ()
 
-    case .connecting, .active, .ready:
+    // These cases are purposefully separated: some crash reporting services provide stack traces
+    // which don't include the precondition failure message (which contain the invalid state we were
+    // in). Keeping the cases separate allows us work out the state from the line number.
+    case .connecting:
+      self.invalidState()
+
+    case .active:
+      self.invalidState()
+
+    case .ready:
       self.invalidState()
     }
   }

+ 12 - 2
Sources/GRPC/GRPCIdleHandler.swift

@@ -126,11 +126,21 @@ internal class GRPCIdleHandler: ChannelInboundHandler {
     self.scheduledIdle = nil
 
     switch (self.mode, self.state) {
-    case let (.client(manager), .notReady),
-         let (.client(manager), .ready):
+    case let (.client(manager), .notReady):
       self.state = .closed
       manager.channelInactive()
 
+    case let (.client(manager), .ready):
+      self.state = .closed
+
+      if self.activeStreams == 0 {
+        // We're ready and there are no active streams: we can treat this as the server idling our
+        // connection.
+        manager.idle()
+      } else {
+        manager.channelInactive()
+      }
+
     case (.server, .notReady),
          (.server, .ready),
          (_, .closed):

+ 85 - 17
Tests/GRPCTests/ConnectionManagerTests.swift

@@ -41,26 +41,30 @@ class ConnectionManagerTests: GRPCTestCase {
     from: ConnectivityState,
     to: ConnectivityState,
     timeout: DispatchTimeInterval = .seconds(1),
+    file: StaticString = #file,
+    line: UInt = #line,
     body: () throws -> Result
   ) rethrows -> Result {
     self.recorder.expectChange {
-      XCTAssertEqual($0, Change(from: from, to: to))
+      XCTAssertEqual($0, Change(from: from, to: to), file: file, line: line)
     }
     let result = try body()
-    self.recorder.waitForExpectedChanges(timeout: timeout)
+    self.recorder.waitForExpectedChanges(timeout: timeout, file: file, line: line)
     return result
   }
 
   private func waitForStateChanges<Result>(
     _ changes: [Change],
     timeout: DispatchTimeInterval = .seconds(1),
+    file: StaticString = #file,
+    line: UInt = #line,
     body: () throws -> Result
   ) rethrows -> Result {
     self.recorder.expectChanges(changes.count) {
       XCTAssertEqual($0, changes)
     }
     let result = try body()
-    self.recorder.waitForExpectedChanges(timeout: timeout)
+    self.recorder.waitForExpectedChanges(timeout: timeout, file: file, line: line)
     return result
   }
 }
@@ -115,12 +119,10 @@ extension ConnectionManagerTests {
     }
 
     // Start the connection.
-    let readyChannel: EventLoopFuture<Channel> = self
-      .waitForStateChange(from: .idle, to: .connecting) {
-        let readyChannel = manager.getChannel()
-        self.loop.run()
-        return readyChannel
-      }
+    self.waitForStateChange(from: .idle, to: .connecting) {
+      _ = manager.getChannel()
+      self.loop.run()
+    }
 
     // Setup the real channel and activate it.
     let channel = EmbeddedChannel(
@@ -141,8 +143,10 @@ extension ConnectionManagerTests {
 
     // Close the channel.
     try self.waitForStateChange(from: .ready, to: .shutdown) {
-      // Now the channel should be available: shut it down,
-      XCTAssertNoThrow(try readyChannel.flatMap { $0.close(mode: .all) }.wait())
+      // Now the channel should be available: shut it down.
+      let shutdown = manager.shutdown()
+      self.loop.run()
+      XCTAssertNoThrow(try shutdown.wait())
     }
   }
 
@@ -289,7 +293,9 @@ extension ConnectionManagerTests {
 
     try self.waitForStateChange(from: .connecting, to: .shutdown) {
       // Okay: now close the channel; the `readyChannel` future has not been completed yet.
-      XCTAssertNoThrow(try channel.close(mode: .all).wait())
+      let shutdown = manager.shutdown()
+      self.loop.run()
+      XCTAssertNoThrow(try shutdown.wait())
     }
 
     // We failed to get a channel and we don't have reconnect configured: we should be shutdown and
@@ -590,7 +596,14 @@ extension ConnectionManagerTests {
     // Channel should now be ready.
     XCTAssertNoThrow(try readyChannel.wait())
 
-    // Kill the first channel.
+    // Kill the first channel. But first ensure there's an active RPC, otherwise we'll idle.
+    let streamCreated = NIOHTTP2StreamCreatedEvent(
+      streamID: 1,
+      localInitialWindowSize: nil,
+      remoteInitialWindowSize: nil
+    )
+    firstChannel.pipeline.fireUserInboundEventTriggered(streamCreated)
+
     try self.waitForStateChange(from: .ready, to: .transientFailure) {
       XCTAssertNoThrow(try firstChannel.close().wait())
     }
@@ -865,14 +878,62 @@ extension ConnectionManagerTests {
     XCTAssertNoThrow(try readyChannel.wait())
 
     // Now drop the connection.
-    self.waitForStateChange(from: .ready, to: .shutdown) {
-      channel.pipeline.fireChannelInactive()
+    try self.waitForStateChange(from: .ready, to: .shutdown) {
+      let shutdown = manager.shutdown()
+      self.loop.run()
+      XCTAssertNoThrow(try shutdown.wait())
     }
 
     // Fire a connection idled event, i.e. keepalive timeout has fired. This should be a no-op.
     // Previously this would hit a precondition failure.
     channel.pipeline.fireUserInboundEventTriggered(ConnectionIdledEvent())
   }
+
+  func testCloseWithoutActiveRPCs() throws {
+    let channelPromise = self.loop.makePromise(of: Channel.self)
+    let manager = ConnectionManager.testingOnly(
+      configuration: self.defaultConfiguration,
+      logger: self.logger
+    ) {
+      channelPromise.futureResult
+    }
+
+    // Start the connection.
+    let readyChannel = self.waitForStateChange(
+      from: .idle,
+      to: .connecting
+    ) { () -> EventLoopFuture<Channel> in
+      let readyChannel = manager.getChannel()
+      self.loop.run()
+      return readyChannel
+    }
+
+    // Setup the actual channel and activate it.
+    let channel = EmbeddedChannel(loop: self.loop)
+    XCTAssertNoThrow(try channel.pipeline.addHandlers([
+      GRPCIdleHandler(mode: .client(manager), logger: manager.logger),
+    ]).wait())
+    channelPromise.succeed(channel)
+    self.loop.run()
+
+    let connect = channel.connect(to: try SocketAddress(unixDomainSocketPath: "/ignored"))
+    XCTAssertNoThrow(try connect.wait())
+
+    // "ready" the connection.
+    try self.waitForStateChange(from: .connecting, to: .ready) {
+      let frame = HTTP2Frame(streamID: .rootStream, payload: .settings(.settings([])))
+      XCTAssertNoThrow(try channel.writeInbound(frame))
+    }
+
+    // The channel should now be ready.
+    XCTAssertNoThrow(try readyChannel.wait())
+
+    // Close the channel. There are no active RPCs so we should idle rather than be in the transient
+    // failure state.
+    self.waitForStateChange(from: .ready, to: .idle) {
+      channel.pipeline.fireChannelInactive()
+    }
+  }
 }
 
 internal struct Change: Hashable, CustomStringConvertible {
@@ -956,13 +1017,20 @@ internal class RecordingConnectivityDelegate: ConnectivityStateDelegate {
     }
   }
 
-  func waitForExpectedChanges(timeout: DispatchTimeInterval) {
+  func waitForExpectedChanges(
+    timeout: DispatchTimeInterval,
+    file: StaticString = #file,
+    line: UInt = #line
+  ) {
     let result = self.semaphore.wait(timeout: .now() + timeout)
     switch result {
     case .success:
       ()
     case .timedOut:
-      XCTFail("Timed out before verifying \(self.expectation.count) change(s)")
+      XCTFail(
+        "Timed out before verifying \(self.expectation.count) change(s)",
+        file: file, line: line
+      )
     }
   }
 }

+ 1 - 5
Tests/GRPCTests/GRPCIdleTests.swift

@@ -35,10 +35,6 @@ class GRPCIdleTests: GRPCTestCase {
   }
 
   func doTestIdleTimeout(serverIdle: TimeAmount, clientIdle: TimeAmount) throws {
-    // Is the server idling first? This determines what state change the client should see when the
-    // idle happens.
-    let isServerIdleFirst = serverIdle < clientIdle
-
     let group = MultiThreadedEventLoopGroup(numberOfThreads: 1)
     defer {
       XCTAssertNoThrow(try group.syncShutdownGracefully())
@@ -61,7 +57,7 @@ class GRPCIdleTests: GRPCTestCase {
       XCTAssertEqual(changes, [
         Change(from: .idle, to: .connecting),
         Change(from: .connecting, to: .ready),
-        Change(from: .ready, to: isServerIdleFirst ? .transientFailure : .idle),
+        Change(from: .ready, to: .idle),
       ])
     }
 

+ 1 - 0
Tests/GRPCTests/XCTestManifests.swift

@@ -147,6 +147,7 @@ extension ConnectionManagerTests {
     //   `swift test --generate-linuxmain`
     // to regenerate.
     static let __allTests__ConnectionManagerTests = [
+        ("testCloseWithoutActiveRPCs", testCloseWithoutActiveRPCs),
         ("testConnectAndDisconnect", testConnectAndDisconnect),
         ("testConnectAndIdle", testConnectAndIdle),
         ("testConnectAndThenBecomeInactive", testConnectAndThenBecomeInactive),