SpeechService.swift 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. /*
  2. * Copyright 2020, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import GRPC
  17. import Logging
  18. typealias Request = Google_Cloud_Speech_V1_StreamingRecognizeRequest
  19. typealias Response = Google_Cloud_Speech_V1_StreamingRecognizeResponse
  20. typealias StreamingRecognizeCall = BidirectionalStreamingCall
  21. final class SpeechService {
  22. // Track whether we are currently streaming or not
  23. enum State {
  24. case idle
  25. case streaming(StreamingRecognizeCall<Request, Response>)
  26. }
  27. // Generated SpeechClient for making calls
  28. private var client: Google_Cloud_Speech_V1_SpeechClient
  29. // Track if we are streaming or not
  30. private var state: State = .idle
  31. init() {
  32. precondition(
  33. !Constants.apiKey.isEmpty,
  34. "Please refer to the README on how to configure your API Key properly."
  35. )
  36. // Make EventLoopGroup for the specific platform (NIOTSEventLoopGroup for iOS)
  37. // see https://github.com/grpc/grpc-swift/blob/main/docs/apple-platforms.md for more details
  38. let group = PlatformSupport.makeEventLoopGroup(loopCount: 1)
  39. // Setup a logger for debugging.
  40. var logger = Logger(label: "gRPC", factory: StreamLogHandler.standardOutput(label:))
  41. logger.logLevel = .debug
  42. // Create a connection secured with TLS to Google's speech service running on our `EventLoopGroup`
  43. let channel = ClientConnection
  44. .secure(group: group)
  45. .withBackgroundActivityLogger(logger)
  46. .connect(host: "speech.googleapis.com", port: 443)
  47. // Specify call options to be used for gRPC calls
  48. let callOptions = CallOptions(customMetadata: [
  49. "x-goog-api-key": Constants.apiKey,
  50. ], logger: logger)
  51. // Now we have a client!
  52. self.client = Google_Cloud_Speech_V1_SpeechClient(
  53. channel: channel,
  54. defaultCallOptions: callOptions
  55. )
  56. }
  57. func stream(
  58. _ data: Data,
  59. completion: ((Google_Cloud_Speech_V1_StreamingRecognizeResponse) -> Void)? = nil
  60. ) {
  61. switch self.state {
  62. case .idle:
  63. // Initialize the bidirectional stream
  64. let call = self.client.streamingRecognize { response in
  65. // Message received from Server, execute provided closure from caller
  66. completion?(response)
  67. }
  68. self.state = .streaming(call)
  69. // Specify audio details
  70. let config = Google_Cloud_Speech_V1_RecognitionConfig.with {
  71. $0.encoding = .linear16
  72. $0.sampleRateHertz = Int32(Constants.sampleRate)
  73. $0.languageCode = "en-US"
  74. $0.enableAutomaticPunctuation = true
  75. $0.metadata = Google_Cloud_Speech_V1_RecognitionMetadata.with {
  76. $0.interactionType = .dictation
  77. $0.microphoneDistance = .nearfield
  78. $0.recordingDeviceType = .smartphone
  79. }
  80. }
  81. // Create streaming request
  82. let request = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  83. $0.streamingConfig = Google_Cloud_Speech_V1_StreamingRecognitionConfig.with {
  84. $0.config = config
  85. }
  86. }
  87. // Send first message consisting of the streaming request details
  88. call.sendMessage(request, promise: nil)
  89. // Stream request to send that contains the audio details
  90. let streamAudioDataRequest = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  91. $0.audioContent = data
  92. }
  93. // Send audio data
  94. call.sendMessage(streamAudioDataRequest, promise: nil)
  95. case let .streaming(call):
  96. // Stream request to send that contains the audio details
  97. let streamAudioDataRequest = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  98. $0.audioContent = data
  99. }
  100. // Send audio data
  101. call.sendMessage(streamAudioDataRequest, promise: nil)
  102. }
  103. }
  104. func stopStreaming() {
  105. // Send end message to the stream
  106. switch self.state {
  107. case .idle:
  108. return
  109. case let .streaming(stream):
  110. stream.sendEnd(promise: nil)
  111. self.state = .idle
  112. }
  113. }
  114. }