SpeechService.swift 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /*
  2. * Copyright 2020, gRPC Authors All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. import Foundation
  17. import GRPC
  18. import Logging
  19. typealias Request = Google_Cloud_Speech_V1_StreamingRecognizeRequest
  20. typealias Response = Google_Cloud_Speech_V1_StreamingRecognizeResponse
  21. typealias StreamingRecognizeCall = BidirectionalStreamingCall
  22. final class SpeechService {
  23. // Track whether we are currently streaming or not
  24. enum State {
  25. case idle
  26. case streaming(StreamingRecognizeCall<Request, Response>)
  27. }
  28. // Generated SpeechClient for making calls
  29. private var client: Google_Cloud_Speech_V1_SpeechClient
  30. // Track if we are streaming or not
  31. private var state: State = .idle
  32. init() {
  33. precondition(
  34. !Constants.apiKey.isEmpty,
  35. "Please refer to the README on how to configure your API Key properly."
  36. )
  37. // Make EventLoopGroup for the specific platform (NIOTSEventLoopGroup for iOS)
  38. // see https://github.com/grpc/grpc-swift/blob/main/docs/apple-platforms.md for more details
  39. let group = PlatformSupport.makeEventLoopGroup(loopCount: 1)
  40. // Setup a logger for debugging.
  41. var logger = Logger(label: "gRPC", factory: StreamLogHandler.standardOutput(label:))
  42. logger.logLevel = .debug
  43. // Create a connection secured with TLS to Google's speech service running on our `EventLoopGroup`
  44. let channel = ClientConnection
  45. .usingPlatformAppropriateTLS(for: group)
  46. .withBackgroundActivityLogger(logger)
  47. .connect(host: "speech.googleapis.com", port: 443)
  48. // Specify call options to be used for gRPC calls
  49. let callOptions = CallOptions(customMetadata: [
  50. "x-goog-api-key": Constants.apiKey,
  51. ], logger: logger)
  52. // Now we have a client!
  53. self.client = Google_Cloud_Speech_V1_SpeechClient(
  54. channel: channel,
  55. defaultCallOptions: callOptions
  56. )
  57. }
  58. func stream(
  59. _ data: Data,
  60. completion: ((Google_Cloud_Speech_V1_StreamingRecognizeResponse) -> Void)? = nil
  61. ) {
  62. switch self.state {
  63. case .idle:
  64. // Initialize the bidirectional stream
  65. let call = self.client.streamingRecognize { response in
  66. // Message received from Server, execute provided closure from caller
  67. completion?(response)
  68. }
  69. self.state = .streaming(call)
  70. // Specify audio details
  71. let config = Google_Cloud_Speech_V1_RecognitionConfig.with {
  72. $0.encoding = .linear16
  73. $0.sampleRateHertz = Int32(Constants.sampleRate)
  74. $0.languageCode = "en-US"
  75. $0.enableAutomaticPunctuation = true
  76. $0.metadata = Google_Cloud_Speech_V1_RecognitionMetadata.with {
  77. $0.interactionType = .dictation
  78. $0.microphoneDistance = .nearfield
  79. $0.recordingDeviceType = .smartphone
  80. }
  81. }
  82. // Create streaming request
  83. let request = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  84. $0.streamingConfig = Google_Cloud_Speech_V1_StreamingRecognitionConfig.with {
  85. $0.config = config
  86. }
  87. }
  88. // Send first message consisting of the streaming request details
  89. call.sendMessage(request, promise: nil)
  90. // Stream request to send that contains the audio details
  91. let streamAudioDataRequest = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  92. $0.audioContent = data
  93. }
  94. // Send audio data
  95. call.sendMessage(streamAudioDataRequest, promise: nil)
  96. case let .streaming(call):
  97. // Stream request to send that contains the audio details
  98. let streamAudioDataRequest = Google_Cloud_Speech_V1_StreamingRecognizeRequest.with {
  99. $0.audioContent = data
  100. }
  101. // Send audio data
  102. call.sendMessage(streamAudioDataRequest, promise: nil)
  103. }
  104. }
  105. func stopStreaming() {
  106. // Send end message to the stream
  107. switch self.state {
  108. case .idle:
  109. return
  110. case let .streaming(stream):
  111. stream.sendEnd(promise: nil)
  112. self.state = .idle
  113. }
  114. }
  115. }