SpeechService.swift 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /*
  2. * Copyright 2020, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import GRPC
  17. import Logging
  18. typealias Request = Google_Cloud_Speech_V1_StreamingRecognizeRequest
  19. typealias Response = Google_Cloud_Speech_V1_StreamingRecognizeResponse
  20. typealias StreamingRecognizeCall = BidirectionalStreamingCall
  21. final class SpeechService {
  22. // Track whether we are currently streaming or not
  23. enum State {
  24. case idle
  25. case streaming(StreamingRecognizeCall<Request, Response>)
  26. }
  27. // Generated SpeechClient for making calls
  28. private var client: Google_Cloud_Speech_V1_SpeechClient
  29. // Track if we are streaming or not
  30. private var state: State = .idle
  31. init() {
  32. precondition(
  33. !Constants.apiKey.isEmpty,
  34. "Please refer to the README on how to configure your API Key properly."
  35. )
  36. // Make EventLoopGroup for the specific platform (NIOTSEventLoopGroup for iOS)
  37. // see https://github.com/grpc/grpc-swift/blob/main/docs/apple-platforms.md for more details
  38. let group = PlatformSupport.makeEventLoopGroup(loopCount: 1)
  39. // Setup a logger for debugging.
  40. var logger = Logger(label: "gRPC", factory: StreamLogHandler.standardOutput(label:))
  41. logger.logLevel = .debug
  42. // Create a connection secured with TLS to Google's speech service running on our `EventLoopGroup`
  43. let channel = ClientConnection
  44. .secure(group: group)
  45. .withBackgroundActivityLogger(logger)
  46. .connect(host: "speech.googleapis.com", port: 443)
  47. // Specify call options to be used for gRPC calls
  48. let callOptions = CallOptions(customMetadata: [
  49. "x-goog-api-key": Constants.apiKey,
  50. ], logger: logger)
  51. // Now we have a client!
  52. self.client = Google_Cloud_Speech_V1_SpeechClient(
  53. channel: channel,
  54. defaultCallOptions: callOptions
  55. )
  56. }
  57. func stream(_ data: Data,
  58. completion: ((Google_Cloud_Speech_V1_StreamingRecognizeResponse) -> Void)? = nil) {
  59. switch self.state {
  60. case .idle:
  61. // Initialize the bidirectional stream
  62. let call = self.client.streamingRecognize { response in
  63. // Message received from Server, execute provided closure from caller
  64. completion?(response)
  65. }
  66. self.state = .streaming(call)
  67. // Specify audio details
  68. let config = Google_Cloud_Speech_V1_RecognitionConfig.with {
  69. $0.encoding = .linear16
  70. $0.sampleRateHertz = Int32(Constants.sampleRate)
  71. $0.languageCode = "en-US"
  72. $0.enableAutomaticPunctuation = true
  73. $0.metadata = Google_Cloud_Speech_V1_RecognitionMetadata.with {
  74. $0.interactionType = .dictation
  75. $0.microphoneDistance = .nearfield
  76. $0.recordingDeviceType = .smartphone
  77. }
  78. }
  79. // Create streaming request
  80. let request = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  81. $0.streamingConfig = Google_Cloud_Speech_V1_StreamingRecognitionConfig.with {
  82. $0.config = config
  83. }
  84. }
  85. // Send first message consisting of the streaming request details
  86. call.sendMessage(request, promise: nil)
  87. // Stream request to send that contains the audio details
  88. let streamAudioDataRequest = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  89. $0.audioContent = data
  90. }
  91. // Send audio data
  92. call.sendMessage(streamAudioDataRequest, promise: nil)
  93. case let .streaming(call):
  94. // Stream request to send that contains the audio details
  95. let streamAudioDataRequest = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  96. $0.audioContent = data
  97. }
  98. // Send audio data
  99. call.sendMessage(streamAudioDataRequest, promise: nil)
  100. }
  101. }
  102. func stopStreaming() {
  103. // Send end message to the stream
  104. switch self.state {
  105. case .idle:
  106. return
  107. case let .streaming(stream):
  108. stream.sendEnd(promise: nil)
  109. self.state = .idle
  110. }
  111. }
  112. }