| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611 |
- // Copyright 2016 Google Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.language.v1beta1;
- import "google/api/annotations.proto";
- option java_multiple_files = true;
- option java_outer_classname = "LanguageServiceProto";
- option java_package = "com.google.cloud.language.v1beta1";
- option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta1";
- // Provides text analysis operations such as sentiment analysis and entity
- // recognition.
- service LanguageService {
- // Analyzes the sentiment of the provided text.
- rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
- option (google.api.http) = { post: "/v1beta1/documents:analyzeSentiment" body: "*" };
- }
- // Finds named entities (currently finds proper names) in the text,
- // entity types, salience, mentions for each entity, and other properties.
- rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
- option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" };
- }
- // Advanced API that analyzes the document and provides a full set of text
- // annotations, including semantic, syntactic, and sentiment information. This
- // API is intended for users who are familiar with machine learning and need
- // in-depth text features to build upon.
- rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
- option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" };
- }
- }
- // ################################################################ #
- //
- // Represents the input to API methods.
- message Document {
- // The document types enum.
- enum Type {
- // The content type is not specified.
- TYPE_UNSPECIFIED = 0;
- // Plain text
- PLAIN_TEXT = 1;
- // HTML
- HTML = 2;
- }
- // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
- // returns an `INVALID_ARGUMENT` error.
- Type type = 1;
- // The source of the document: a string containing the content or a
- // Google Cloud Storage URI.
- oneof source {
- // The content of the input in string format.
- string content = 2;
- // The Google Cloud Storage URI where the file content is located.
- string gcs_content_uri = 3;
- }
- // The language of the document (if not specified, the language is
- // automatically detected). Both ISO and BCP-47 language codes are
- // accepted.<br>
- // **Current Language Restrictions:**
- //
- // * Only English, Spanish, and Japanese textual content
- // are supported, with the following additional restriction:
- // * `analyzeSentiment` only supports English text.
- // If the language (either specified by the caller or automatically detected)
- // is not supported by the called API method, an `INVALID_ARGUMENT` error
- // is returned.
- string language = 4;
- }
- // Represents a sentence in the input document.
- message Sentence {
- // The sentence text.
- TextSpan text = 1;
- }
- // Represents a phrase in the text that is a known entity, such as
- // a person, an organization, or location. The API associates information, such
- // as salience and mentions, with entities.
- message Entity {
- // The type of the entity.
- enum Type {
- // Unknown
- UNKNOWN = 0;
- // Person
- PERSON = 1;
- // Location
- LOCATION = 2;
- // Organization
- ORGANIZATION = 3;
- // Event
- EVENT = 4;
- // Work of art
- WORK_OF_ART = 5;
- // Consumer goods
- CONSUMER_GOOD = 6;
- // Other types
- OTHER = 7;
- }
- // The representative name for the entity.
- string name = 1;
- // The entity type.
- Type type = 2;
- // Metadata associated with the entity.
- //
- // Currently, only Wikipedia URLs are provided, if available.
- // The associated key is "wikipedia_url".
- map<string, string> metadata = 3;
- // The salience score associated with the entity in the [0, 1.0] range.
- //
- // The salience score for an entity provides information about the
- // importance or centrality of that entity to the entire document text.
- // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
- // salient.
- float salience = 4;
- // The mentions of this entity in the input document. The API currently
- // supports proper noun mentions.
- repeated EntityMention mentions = 5;
- }
- // Represents the smallest syntactic building block of the text.
- message Token {
- // The token text.
- TextSpan text = 1;
- // Parts of speech tag for this token.
- PartOfSpeech part_of_speech = 2;
- // Dependency tree parse for this token.
- DependencyEdge dependency_edge = 3;
- // [Lemma](https://en.wikipedia.org/wiki/Lemma_(morphology))
- // of the token.
- string lemma = 4;
- }
- // Represents the feeling associated with the entire text or entities in
- // the text.
- message Sentiment {
- // Polarity of the sentiment in the [-1.0, 1.0] range. Larger numbers
- // represent more positive sentiments.
- float polarity = 1;
- // A non-negative number in the [0, +inf) range, which represents
- // the absolute magnitude of sentiment regardless of polarity (positive or
- // negative).
- float magnitude = 2;
- }
- // Represents part of speech information for a token.
- message PartOfSpeech {
- // The part of speech tags enum.
- enum Tag {
- // Unknown
- UNKNOWN = 0;
- // Adjective
- ADJ = 1;
- // Adposition (preposition and postposition)
- ADP = 2;
- // Adverb
- ADV = 3;
- // Conjunction
- CONJ = 4;
- // Determiner
- DET = 5;
- // Noun (common and proper)
- NOUN = 6;
- // Cardinal number
- NUM = 7;
- // Pronoun
- PRON = 8;
- // Particle or other function word
- PRT = 9;
- // Punctuation
- PUNCT = 10;
- // Verb (all tenses and modes)
- VERB = 11;
- // Other: foreign words, typos, abbreviations
- X = 12;
- // Affix
- AFFIX = 13;
- }
- // The part of speech tag.
- Tag tag = 1;
- }
- // Represents dependency parse tree information for a token.
- message DependencyEdge {
- // The parse label enum for the token.
- enum Label {
- // Unknown
- UNKNOWN = 0;
- // Abbreviation modifier
- ABBREV = 1;
- // Adjectival complement
- ACOMP = 2;
- // Adverbial clause modifier
- ADVCL = 3;
- // Adverbial modifier
- ADVMOD = 4;
- // Adjectival modifier of an NP
- AMOD = 5;
- // Appositional modifier of an NP
- APPOS = 6;
- // Attribute dependent of a copular verb
- ATTR = 7;
- // Auxiliary (non-main) verb
- AUX = 8;
- // Passive auxiliary
- AUXPASS = 9;
- // Coordinating conjunction
- CC = 10;
- // Clausal complement of a verb or adjective
- CCOMP = 11;
- // Conjunct
- CONJ = 12;
- // Clausal subject
- CSUBJ = 13;
- // Clausal passive subject
- CSUBJPASS = 14;
- // Dependency (unable to determine)
- DEP = 15;
- // Determiner
- DET = 16;
- // Discourse
- DISCOURSE = 17;
- // Direct object
- DOBJ = 18;
- // Expletive
- EXPL = 19;
- // Goes with (part of a word in a text not well edited)
- GOESWITH = 20;
- // Indirect object
- IOBJ = 21;
- // Marker (word introducing a subordinate clause)
- MARK = 22;
- // Multi-word expression
- MWE = 23;
- // Multi-word verbal expression
- MWV = 24;
- // Negation modifier
- NEG = 25;
- // Noun compound modifier
- NN = 26;
- // Noun phrase used as an adverbial modifier
- NPADVMOD = 27;
- // Nominal subject
- NSUBJ = 28;
- // Passive nominal subject
- NSUBJPASS = 29;
- // Numeric modifier of a noun
- NUM = 30;
- // Element of compound number
- NUMBER = 31;
- // Punctuation mark
- P = 32;
- // Parataxis relation
- PARATAXIS = 33;
- // Participial modifier
- PARTMOD = 34;
- // The complement of a preposition is a clause
- PCOMP = 35;
- // Object of a preposition
- POBJ = 36;
- // Possession modifier
- POSS = 37;
- // Postverbal negative particle
- POSTNEG = 38;
- // Predicate complement
- PRECOMP = 39;
- // Preconjunt
- PRECONJ = 40;
- // Predeterminer
- PREDET = 41;
- // Prefix
- PREF = 42;
- // Prepositional modifier
- PREP = 43;
- // The relationship between a verb and verbal morpheme
- PRONL = 44;
- // Particle
- PRT = 45;
- // Associative or possessive marker
- PS = 46;
- // Quantifier phrase modifier
- QUANTMOD = 47;
- // Relative clause modifier
- RCMOD = 48;
- // Complementizer in relative clause
- RCMODREL = 49;
- // Ellipsis without a preceding predicate
- RDROP = 50;
- // Referent
- REF = 51;
- // Remnant
- REMNANT = 52;
- // Reparandum
- REPARANDUM = 53;
- // Root
- ROOT = 54;
- // Suffix specifying a unit of number
- SNUM = 55;
- // Suffix
- SUFF = 56;
- // Temporal modifier
- TMOD = 57;
- // Topic marker
- TOPIC = 58;
- // Clause headed by an infinite form of the verb that modifies a noun
- VMOD = 59;
- // Vocative
- VOCATIVE = 60;
- // Open clausal complement
- XCOMP = 61;
- // Name suffix
- SUFFIX = 62;
- // Name title
- TITLE = 63;
- // Adverbial phrase modifier
- ADVPHMOD = 64;
- // Causative auxiliary
- AUXCAUS = 65;
- // Helper auxiliary
- AUXVV = 66;
- // Rentaishi (Prenominal modifier)
- DTMOD = 67;
- // Foreign words
- FOREIGN = 68;
- // Keyword
- KW = 69;
- // List for chains of comparable items
- LIST = 70;
- // Nominalized clause
- NOMC = 71;
- // Nominalized clausal subject
- NOMCSUBJ = 72;
- // Nominalized clausal passive
- NOMCSUBJPASS = 73;
- // Compound of numeric modifier
- NUMC = 74;
- // Copula
- COP = 75;
- // Dislocated relation (for fronted/topicalized elements)
- DISLOCATED = 76;
- }
- // Represents the head of this token in the dependency tree.
- // This is the index of the token which has an arc going to this token.
- // The index is the position of the token in the array of tokens returned
- // by the API method. If this token is a root token, then the
- // `head_token_index` is its own index.
- int32 head_token_index = 1;
- // The parse label for the token.
- Label label = 2;
- }
- // Represents a mention for an entity in the text. Currently, proper noun
- // mentions are supported.
- message EntityMention {
- // The mention text.
- TextSpan text = 1;
- }
- // Represents an output piece of text.
- message TextSpan {
- // The content of the output text.
- string content = 1;
- // The API calculates the beginning offset of the content in the original
- // document according to the [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the API request.
- int32 begin_offset = 2;
- }
- // The sentiment analysis request message.
- message AnalyzeSentimentRequest {
- // Input document. Currently, `analyzeSentiment` only supports English text
- // ([Document.language][google.cloud.language.v1beta1.Document.language]="EN").
- Document document = 1;
- }
- // The sentiment analysis response message.
- message AnalyzeSentimentResponse {
- // The overall sentiment of the input document.
- Sentiment document_sentiment = 1;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- string language = 2;
- }
- // The entity analysis request message.
- message AnalyzeEntitiesRequest {
- // Input document.
- Document document = 1;
- // The encoding type used by the API to calculate offsets.
- EncodingType encoding_type = 2;
- }
- // The entity analysis response message.
- message AnalyzeEntitiesResponse {
- // The recognized entities in the input document.
- repeated Entity entities = 1;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- string language = 2;
- }
- // The request message for the advanced text annotation API, which performs all
- // the above plus syntactic analysis.
- message AnnotateTextRequest {
- // All available features for sentiment, syntax, and semantic analysis.
- // Setting each one to true will enable that specific analysis for the input.
- message Features {
- // Extract syntax information.
- bool extract_syntax = 1;
- // Extract entities.
- bool extract_entities = 2;
- // Extract document-level sentiment.
- bool extract_document_sentiment = 3;
- }
- // Input document.
- Document document = 1;
- // The enabled features.
- Features features = 2;
- // The encoding type used by the API to calculate offsets.
- EncodingType encoding_type = 3;
- }
- // The text annotations response message.
- message AnnotateTextResponse {
- // Sentences in the input document. Populated if the user enables
- // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
- repeated Sentence sentences = 1;
- // Tokens, along with their syntactic information, in the input document.
- // Populated if the user enables
- // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
- repeated Token tokens = 2;
- // Entities, along with their semantic information, in the input document.
- // Populated if the user enables
- // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities].
- repeated Entity entities = 3;
- // The overall sentiment for the document. Populated if the user enables
- // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment].
- Sentiment document_sentiment = 4;
- // The language of the text, which will be the same as the language specified
- // in the request or, if not specified, the automatically-detected language.
- string language = 5;
- }
- // Represents the text encoding that the caller uses to process the output.
- // Providing an `EncodingType` is recommended because the API provides the
- // beginning offsets for various outputs, such as tokens and mentions, and
- // languages that natively use different text encodings may access offsets
- // differently.
- enum EncodingType {
- // If `EncodingType` is not specified, encoding-dependent information (such as
- // `begin_offset`) will be set at `-1`.
- NONE = 0;
- // Encoding-dependent information (such as `begin_offset`) is calculated based
- // on the UTF-8 encoding of the input. C++ and Go are examples of languages
- // that use this encoding natively.
- UTF8 = 1;
- // Encoding-dependent information (such as `begin_offset`) is calculated based
- // on the UTF-16 encoding of the input. Java and Javascript are examples of
- // languages that use this encoding natively.
- UTF16 = 2;
- // Encoding-dependent information (such as `begin_offset`) is calculated based
- // on the UTF-32 encoding of the input. Python is an example of a language
- // that uses this encoding natively.
- UTF32 = 3;
- }
|