language_service.proto 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.language.v1beta1;
  16. import "google/api/annotations.proto";
  17. option java_multiple_files = true;
  18. option java_outer_classname = "LanguageServiceProto";
  19. option java_package = "com.google.cloud.language.v1beta1";
  20. option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta1";
  21. // Provides text analysis operations such as sentiment analysis and entity
  22. // recognition.
  23. service LanguageService {
  24. // Analyzes the sentiment of the provided text.
  25. rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
  26. option (google.api.http) = { post: "/v1beta1/documents:analyzeSentiment" body: "*" };
  27. }
  28. // Finds named entities (currently finds proper names) in the text,
  29. // entity types, salience, mentions for each entity, and other properties.
  30. rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
  31. option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" };
  32. }
  33. // Advanced API that analyzes the document and provides a full set of text
  34. // annotations, including semantic, syntactic, and sentiment information. This
  35. // API is intended for users who are familiar with machine learning and need
  36. // in-depth text features to build upon.
  37. rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
  38. option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" };
  39. }
  40. }
  41. // ################################################################ #
  42. //
  43. // Represents the input to API methods.
  44. message Document {
  45. // The document types enum.
  46. enum Type {
  47. // The content type is not specified.
  48. TYPE_UNSPECIFIED = 0;
  49. // Plain text
  50. PLAIN_TEXT = 1;
  51. // HTML
  52. HTML = 2;
  53. }
  54. // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
  55. // returns an `INVALID_ARGUMENT` error.
  56. Type type = 1;
  57. // The source of the document: a string containing the content or a
  58. // Google Cloud Storage URI.
  59. oneof source {
  60. // The content of the input in string format.
  61. string content = 2;
  62. // The Google Cloud Storage URI where the file content is located.
  63. string gcs_content_uri = 3;
  64. }
  65. // The language of the document (if not specified, the language is
  66. // automatically detected). Both ISO and BCP-47 language codes are
  67. // accepted.<br>
  68. // **Current Language Restrictions:**
  69. //
  70. // * Only English, Spanish, and Japanese textual content
  71. // are supported, with the following additional restriction:
  72. // * `analyzeSentiment` only supports English text.
  73. // If the language (either specified by the caller or automatically detected)
  74. // is not supported by the called API method, an `INVALID_ARGUMENT` error
  75. // is returned.
  76. string language = 4;
  77. }
  78. // Represents a sentence in the input document.
  79. message Sentence {
  80. // The sentence text.
  81. TextSpan text = 1;
  82. }
  83. // Represents a phrase in the text that is a known entity, such as
  84. // a person, an organization, or location. The API associates information, such
  85. // as salience and mentions, with entities.
  86. message Entity {
  87. // The type of the entity.
  88. enum Type {
  89. // Unknown
  90. UNKNOWN = 0;
  91. // Person
  92. PERSON = 1;
  93. // Location
  94. LOCATION = 2;
  95. // Organization
  96. ORGANIZATION = 3;
  97. // Event
  98. EVENT = 4;
  99. // Work of art
  100. WORK_OF_ART = 5;
  101. // Consumer goods
  102. CONSUMER_GOOD = 6;
  103. // Other types
  104. OTHER = 7;
  105. }
  106. // The representative name for the entity.
  107. string name = 1;
  108. // The entity type.
  109. Type type = 2;
  110. // Metadata associated with the entity.
  111. //
  112. // Currently, only Wikipedia URLs are provided, if available.
  113. // The associated key is "wikipedia_url".
  114. map<string, string> metadata = 3;
  115. // The salience score associated with the entity in the [0, 1.0] range.
  116. //
  117. // The salience score for an entity provides information about the
  118. // importance or centrality of that entity to the entire document text.
  119. // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
  120. // salient.
  121. float salience = 4;
  122. // The mentions of this entity in the input document. The API currently
  123. // supports proper noun mentions.
  124. repeated EntityMention mentions = 5;
  125. }
  126. // Represents the smallest syntactic building block of the text.
  127. message Token {
  128. // The token text.
  129. TextSpan text = 1;
  130. // Parts of speech tag for this token.
  131. PartOfSpeech part_of_speech = 2;
  132. // Dependency tree parse for this token.
  133. DependencyEdge dependency_edge = 3;
  134. // [Lemma](https://en.wikipedia.org/wiki/Lemma_(morphology))
  135. // of the token.
  136. string lemma = 4;
  137. }
  138. // Represents the feeling associated with the entire text or entities in
  139. // the text.
  140. message Sentiment {
  141. // Polarity of the sentiment in the [-1.0, 1.0] range. Larger numbers
  142. // represent more positive sentiments.
  143. float polarity = 1;
  144. // A non-negative number in the [0, +inf) range, which represents
  145. // the absolute magnitude of sentiment regardless of polarity (positive or
  146. // negative).
  147. float magnitude = 2;
  148. }
  149. // Represents part of speech information for a token.
  150. message PartOfSpeech {
  151. // The part of speech tags enum.
  152. enum Tag {
  153. // Unknown
  154. UNKNOWN = 0;
  155. // Adjective
  156. ADJ = 1;
  157. // Adposition (preposition and postposition)
  158. ADP = 2;
  159. // Adverb
  160. ADV = 3;
  161. // Conjunction
  162. CONJ = 4;
  163. // Determiner
  164. DET = 5;
  165. // Noun (common and proper)
  166. NOUN = 6;
  167. // Cardinal number
  168. NUM = 7;
  169. // Pronoun
  170. PRON = 8;
  171. // Particle or other function word
  172. PRT = 9;
  173. // Punctuation
  174. PUNCT = 10;
  175. // Verb (all tenses and modes)
  176. VERB = 11;
  177. // Other: foreign words, typos, abbreviations
  178. X = 12;
  179. // Affix
  180. AFFIX = 13;
  181. }
  182. // The part of speech tag.
  183. Tag tag = 1;
  184. }
  185. // Represents dependency parse tree information for a token.
  186. message DependencyEdge {
  187. // The parse label enum for the token.
  188. enum Label {
  189. // Unknown
  190. UNKNOWN = 0;
  191. // Abbreviation modifier
  192. ABBREV = 1;
  193. // Adjectival complement
  194. ACOMP = 2;
  195. // Adverbial clause modifier
  196. ADVCL = 3;
  197. // Adverbial modifier
  198. ADVMOD = 4;
  199. // Adjectival modifier of an NP
  200. AMOD = 5;
  201. // Appositional modifier of an NP
  202. APPOS = 6;
  203. // Attribute dependent of a copular verb
  204. ATTR = 7;
  205. // Auxiliary (non-main) verb
  206. AUX = 8;
  207. // Passive auxiliary
  208. AUXPASS = 9;
  209. // Coordinating conjunction
  210. CC = 10;
  211. // Clausal complement of a verb or adjective
  212. CCOMP = 11;
  213. // Conjunct
  214. CONJ = 12;
  215. // Clausal subject
  216. CSUBJ = 13;
  217. // Clausal passive subject
  218. CSUBJPASS = 14;
  219. // Dependency (unable to determine)
  220. DEP = 15;
  221. // Determiner
  222. DET = 16;
  223. // Discourse
  224. DISCOURSE = 17;
  225. // Direct object
  226. DOBJ = 18;
  227. // Expletive
  228. EXPL = 19;
  229. // Goes with (part of a word in a text not well edited)
  230. GOESWITH = 20;
  231. // Indirect object
  232. IOBJ = 21;
  233. // Marker (word introducing a subordinate clause)
  234. MARK = 22;
  235. // Multi-word expression
  236. MWE = 23;
  237. // Multi-word verbal expression
  238. MWV = 24;
  239. // Negation modifier
  240. NEG = 25;
  241. // Noun compound modifier
  242. NN = 26;
  243. // Noun phrase used as an adverbial modifier
  244. NPADVMOD = 27;
  245. // Nominal subject
  246. NSUBJ = 28;
  247. // Passive nominal subject
  248. NSUBJPASS = 29;
  249. // Numeric modifier of a noun
  250. NUM = 30;
  251. // Element of compound number
  252. NUMBER = 31;
  253. // Punctuation mark
  254. P = 32;
  255. // Parataxis relation
  256. PARATAXIS = 33;
  257. // Participial modifier
  258. PARTMOD = 34;
  259. // The complement of a preposition is a clause
  260. PCOMP = 35;
  261. // Object of a preposition
  262. POBJ = 36;
  263. // Possession modifier
  264. POSS = 37;
  265. // Postverbal negative particle
  266. POSTNEG = 38;
  267. // Predicate complement
  268. PRECOMP = 39;
  269. // Preconjunt
  270. PRECONJ = 40;
  271. // Predeterminer
  272. PREDET = 41;
  273. // Prefix
  274. PREF = 42;
  275. // Prepositional modifier
  276. PREP = 43;
  277. // The relationship between a verb and verbal morpheme
  278. PRONL = 44;
  279. // Particle
  280. PRT = 45;
  281. // Associative or possessive marker
  282. PS = 46;
  283. // Quantifier phrase modifier
  284. QUANTMOD = 47;
  285. // Relative clause modifier
  286. RCMOD = 48;
  287. // Complementizer in relative clause
  288. RCMODREL = 49;
  289. // Ellipsis without a preceding predicate
  290. RDROP = 50;
  291. // Referent
  292. REF = 51;
  293. // Remnant
  294. REMNANT = 52;
  295. // Reparandum
  296. REPARANDUM = 53;
  297. // Root
  298. ROOT = 54;
  299. // Suffix specifying a unit of number
  300. SNUM = 55;
  301. // Suffix
  302. SUFF = 56;
  303. // Temporal modifier
  304. TMOD = 57;
  305. // Topic marker
  306. TOPIC = 58;
  307. // Clause headed by an infinite form of the verb that modifies a noun
  308. VMOD = 59;
  309. // Vocative
  310. VOCATIVE = 60;
  311. // Open clausal complement
  312. XCOMP = 61;
  313. // Name suffix
  314. SUFFIX = 62;
  315. // Name title
  316. TITLE = 63;
  317. // Adverbial phrase modifier
  318. ADVPHMOD = 64;
  319. // Causative auxiliary
  320. AUXCAUS = 65;
  321. // Helper auxiliary
  322. AUXVV = 66;
  323. // Rentaishi (Prenominal modifier)
  324. DTMOD = 67;
  325. // Foreign words
  326. FOREIGN = 68;
  327. // Keyword
  328. KW = 69;
  329. // List for chains of comparable items
  330. LIST = 70;
  331. // Nominalized clause
  332. NOMC = 71;
  333. // Nominalized clausal subject
  334. NOMCSUBJ = 72;
  335. // Nominalized clausal passive
  336. NOMCSUBJPASS = 73;
  337. // Compound of numeric modifier
  338. NUMC = 74;
  339. // Copula
  340. COP = 75;
  341. // Dislocated relation (for fronted/topicalized elements)
  342. DISLOCATED = 76;
  343. }
  344. // Represents the head of this token in the dependency tree.
  345. // This is the index of the token which has an arc going to this token.
  346. // The index is the position of the token in the array of tokens returned
  347. // by the API method. If this token is a root token, then the
  348. // `head_token_index` is its own index.
  349. int32 head_token_index = 1;
  350. // The parse label for the token.
  351. Label label = 2;
  352. }
  353. // Represents a mention for an entity in the text. Currently, proper noun
  354. // mentions are supported.
  355. message EntityMention {
  356. // The mention text.
  357. TextSpan text = 1;
  358. }
  359. // Represents an output piece of text.
  360. message TextSpan {
  361. // The content of the output text.
  362. string content = 1;
  363. // The API calculates the beginning offset of the content in the original
  364. // document according to the [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the API request.
  365. int32 begin_offset = 2;
  366. }
  367. // The sentiment analysis request message.
  368. message AnalyzeSentimentRequest {
  369. // Input document. Currently, `analyzeSentiment` only supports English text
  370. // ([Document.language][google.cloud.language.v1beta1.Document.language]="EN").
  371. Document document = 1;
  372. }
  373. // The sentiment analysis response message.
  374. message AnalyzeSentimentResponse {
  375. // The overall sentiment of the input document.
  376. Sentiment document_sentiment = 1;
  377. // The language of the text, which will be the same as the language specified
  378. // in the request or, if not specified, the automatically-detected language.
  379. string language = 2;
  380. }
  381. // The entity analysis request message.
  382. message AnalyzeEntitiesRequest {
  383. // Input document.
  384. Document document = 1;
  385. // The encoding type used by the API to calculate offsets.
  386. EncodingType encoding_type = 2;
  387. }
  388. // The entity analysis response message.
  389. message AnalyzeEntitiesResponse {
  390. // The recognized entities in the input document.
  391. repeated Entity entities = 1;
  392. // The language of the text, which will be the same as the language specified
  393. // in the request or, if not specified, the automatically-detected language.
  394. string language = 2;
  395. }
  396. // The request message for the advanced text annotation API, which performs all
  397. // the above plus syntactic analysis.
  398. message AnnotateTextRequest {
  399. // All available features for sentiment, syntax, and semantic analysis.
  400. // Setting each one to true will enable that specific analysis for the input.
  401. message Features {
  402. // Extract syntax information.
  403. bool extract_syntax = 1;
  404. // Extract entities.
  405. bool extract_entities = 2;
  406. // Extract document-level sentiment.
  407. bool extract_document_sentiment = 3;
  408. }
  409. // Input document.
  410. Document document = 1;
  411. // The enabled features.
  412. Features features = 2;
  413. // The encoding type used by the API to calculate offsets.
  414. EncodingType encoding_type = 3;
  415. }
  416. // The text annotations response message.
  417. message AnnotateTextResponse {
  418. // Sentences in the input document. Populated if the user enables
  419. // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
  420. repeated Sentence sentences = 1;
  421. // Tokens, along with their syntactic information, in the input document.
  422. // Populated if the user enables
  423. // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
  424. repeated Token tokens = 2;
  425. // Entities, along with their semantic information, in the input document.
  426. // Populated if the user enables
  427. // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities].
  428. repeated Entity entities = 3;
  429. // The overall sentiment for the document. Populated if the user enables
  430. // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment].
  431. Sentiment document_sentiment = 4;
  432. // The language of the text, which will be the same as the language specified
  433. // in the request or, if not specified, the automatically-detected language.
  434. string language = 5;
  435. }
  436. // Represents the text encoding that the caller uses to process the output.
  437. // Providing an `EncodingType` is recommended because the API provides the
  438. // beginning offsets for various outputs, such as tokens and mentions, and
  439. // languages that natively use different text encodings may access offsets
  440. // differently.
  441. enum EncodingType {
  442. // If `EncodingType` is not specified, encoding-dependent information (such as
  443. // `begin_offset`) will be set at `-1`.
  444. NONE = 0;
  445. // Encoding-dependent information (such as `begin_offset`) is calculated based
  446. // on the UTF-8 encoding of the input. C++ and Go are examples of languages
  447. // that use this encoding natively.
  448. UTF8 = 1;
  449. // Encoding-dependent information (such as `begin_offset`) is calculated based
  450. // on the UTF-16 encoding of the input. Java and Javascript are examples of
  451. // languages that use this encoding natively.
  452. UTF16 = 2;
  453. // Encoding-dependent information (such as `begin_offset`) is calculated based
  454. // on the UTF-32 encoding of the input. Python is an example of a language
  455. // that uses this encoding natively.
  456. UTF32 = 3;
  457. }