references.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.genomics.v1;
  16. import "google/api/annotations.proto";
  17. option cc_enable_arenas = true;
  18. option java_multiple_files = true;
  19. option java_outer_classname = "ReferencesProto";
  20. option java_package = "com.google.genomics.v1";
  21. service ReferenceServiceV1 {
  22. // Searches for reference sets which match the given criteria.
  23. //
  24. // For the definitions of references and other genomics resources, see
  25. // [Fundamentals of Google
  26. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  27. //
  28. // Implements
  29. // [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
  30. rpc SearchReferenceSets(SearchReferenceSetsRequest) returns (SearchReferenceSetsResponse) {
  31. option (google.api.http) = { post: "/v1/referencesets/search" body: "*" };
  32. }
  33. // Gets a reference set.
  34. //
  35. // For the definitions of references and other genomics resources, see
  36. // [Fundamentals of Google
  37. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  38. //
  39. // Implements
  40. // [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
  41. rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) {
  42. option (google.api.http) = { get: "/v1/referencesets/{reference_set_id}" };
  43. }
  44. // Searches for references which match the given criteria.
  45. //
  46. // For the definitions of references and other genomics resources, see
  47. // [Fundamentals of Google
  48. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  49. //
  50. // Implements
  51. // [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
  52. rpc SearchReferences(SearchReferencesRequest) returns (SearchReferencesResponse) {
  53. option (google.api.http) = { post: "/v1/references/search" body: "*" };
  54. }
  55. // Gets a reference.
  56. //
  57. // For the definitions of references and other genomics resources, see
  58. // [Fundamentals of Google
  59. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  60. //
  61. // Implements
  62. // [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
  63. rpc GetReference(GetReferenceRequest) returns (Reference) {
  64. option (google.api.http) = { get: "/v1/references/{reference_id}" };
  65. }
  66. // Lists the bases in a reference, optionally restricted to a range.
  67. //
  68. // For the definitions of references and other genomics resources, see
  69. // [Fundamentals of Google
  70. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  71. //
  72. // Implements
  73. // [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
  74. rpc ListBases(ListBasesRequest) returns (ListBasesResponse) {
  75. option (google.api.http) = { get: "/v1/references/{reference_id}/bases" };
  76. }
  77. }
  78. // A reference is a canonical assembled DNA sequence, intended to act as a
  79. // reference coordinate space for other genomic annotations. A single reference
  80. // might represent the human chromosome 1 or mitochandrial DNA, for instance. A
  81. // reference belongs to one or more reference sets.
  82. //
  83. // For more genomics resource definitions, see [Fundamentals of Google
  84. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  85. message Reference {
  86. // The server-generated reference ID, unique across all references.
  87. string id = 1;
  88. // The length of this reference's sequence.
  89. int64 length = 2;
  90. // MD5 of the upper-case sequence excluding all whitespace characters (this
  91. // is equivalent to SQ:M5 in SAM). This value is represented in lower case
  92. // hexadecimal format.
  93. string md5checksum = 3;
  94. // The name of this reference, for example `22`.
  95. string name = 4;
  96. // The URI from which the sequence was obtained. Typically specifies a FASTA
  97. // format file.
  98. string source_uri = 5;
  99. // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
  100. // with a version number, for example `GCF_000001405.26`.
  101. repeated string source_accessions = 6;
  102. // ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.
  103. int32 ncbi_taxon_id = 7;
  104. }
  105. // A reference set is a set of references which typically comprise a reference
  106. // assembly for a species, such as `GRCh38` which is representative
  107. // of the human genome. A reference set defines a common coordinate space for
  108. // comparing reference-aligned experimental data. A reference set contains 1 or
  109. // more references.
  110. //
  111. // For more genomics resource definitions, see [Fundamentals of Google
  112. // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
  113. message ReferenceSet {
  114. // The server-generated reference set ID, unique across all reference sets.
  115. string id = 1;
  116. // The IDs of the reference objects that are part of this set.
  117. // `Reference.md5checksum` must be unique within this set.
  118. repeated string reference_ids = 2;
  119. // Order-independent MD5 checksum which identifies this reference set. The
  120. // checksum is computed by sorting all lower case hexidecimal string
  121. // `reference.md5checksum` (for all reference in this set) in
  122. // ascending lexicographic order, concatenating, and taking the MD5 of that
  123. // value. The resulting value is represented in lower case hexadecimal format.
  124. string md5checksum = 3;
  125. // ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human)
  126. // indicating the species which this reference set is intended to model. Note
  127. // that contained references may specify a different `ncbiTaxonId`, as
  128. // assemblies may contain reference sequences which do not belong to the
  129. // modeled species, for example EBV in a human reference genome.
  130. int32 ncbi_taxon_id = 4;
  131. // Free text description of this reference set.
  132. string description = 5;
  133. // Public id of this reference set, such as `GRCh37`.
  134. string assembly_id = 6;
  135. // The URI from which the references were obtained.
  136. string source_uri = 7;
  137. // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
  138. // with a version number, for example `NC_000001.11`.
  139. repeated string source_accessions = 8;
  140. }
  141. message SearchReferenceSetsRequest {
  142. // If present, return reference sets for which the
  143. // [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
  144. repeated string md5checksums = 1;
  145. // If present, return reference sets for which a prefix of any of
  146. // [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions]
  147. // match any of these strings. Accession numbers typically have a main number
  148. // and a version, for example `NC_000001.11`.
  149. repeated string accessions = 2;
  150. // If present, return reference sets for which a substring of their
  151. // `assemblyId` matches this string (case insensitive).
  152. string assembly_id = 3;
  153. // The continuation token, which is used to page through large result sets.
  154. // To get the next page of results, set this parameter to the value of
  155. // `nextPageToken` from the previous response.
  156. string page_token = 4;
  157. // The maximum number of results to return in a single page. If unspecified,
  158. // defaults to 1024. The maximum value is 4096.
  159. int32 page_size = 5;
  160. }
  161. message SearchReferenceSetsResponse {
  162. // The matching references sets.
  163. repeated ReferenceSet reference_sets = 1;
  164. // The continuation token, which is used to page through large result sets.
  165. // Provide this value in a subsequent request to return the next page of
  166. // results. This field will be empty if there aren't any additional results.
  167. string next_page_token = 2;
  168. }
  169. message GetReferenceSetRequest {
  170. // The ID of the reference set.
  171. string reference_set_id = 1;
  172. }
  173. message SearchReferencesRequest {
  174. // If present, return references for which the
  175. // [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
  176. repeated string md5checksums = 1;
  177. // If present, return references for which a prefix of any of
  178. // [sourceAccessions][google.genomics.v1.Reference.source_accessions] match
  179. // any of these strings. Accession numbers typically have a main number and a
  180. // version, for example `GCF_000001405.26`.
  181. repeated string accessions = 2;
  182. // If present, return only references which belong to this reference set.
  183. string reference_set_id = 3;
  184. // The continuation token, which is used to page through large result sets.
  185. // To get the next page of results, set this parameter to the value of
  186. // `nextPageToken` from the previous response.
  187. string page_token = 4;
  188. // The maximum number of results to return in a single page. If unspecified,
  189. // defaults to 1024. The maximum value is 4096.
  190. int32 page_size = 5;
  191. }
  192. message SearchReferencesResponse {
  193. // The matching references.
  194. repeated Reference references = 1;
  195. // The continuation token, which is used to page through large result sets.
  196. // Provide this value in a subsequent request to return the next page of
  197. // results. This field will be empty if there aren't any additional results.
  198. string next_page_token = 2;
  199. }
  200. message GetReferenceRequest {
  201. // The ID of the reference.
  202. string reference_id = 1;
  203. }
  204. message ListBasesRequest {
  205. // The ID of the reference.
  206. string reference_id = 1;
  207. // The start position (0-based) of this query. Defaults to 0.
  208. int64 start = 2;
  209. // The end position (0-based, exclusive) of this query. Defaults to the length
  210. // of this reference.
  211. int64 end = 3;
  212. // The continuation token, which is used to page through large result sets.
  213. // To get the next page of results, set this parameter to the value of
  214. // `nextPageToken` from the previous response.
  215. string page_token = 4;
  216. // The maximum number of bases to return in a single page. If unspecified,
  217. // defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base
  218. // pairs).
  219. int32 page_size = 5;
  220. }
  221. message ListBasesResponse {
  222. // The offset position (0-based) of the given `sequence` from the
  223. // start of this `Reference`. This value will differ for each page
  224. // in a paginated request.
  225. int64 offset = 1;
  226. // A substring of the bases that make up this reference.
  227. string sequence = 2;
  228. // The continuation token, which is used to page through large result sets.
  229. // Provide this value in a subsequent request to return the next page of
  230. // results. This field will be empty if there aren't any additional results.
  231. string next_page_token = 3;
  232. }