pipelines.proto 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.genomics.v1alpha2;
  16. import "google/api/annotations.proto";
  17. import "google/longrunning/operations.proto";
  18. import "google/protobuf/duration.proto";
  19. import "google/protobuf/empty.proto";
  20. import "google/protobuf/timestamp.proto";
  21. import "google/rpc/code.proto";
  22. option cc_enable_arenas = true;
  23. option java_multiple_files = true;
  24. option java_outer_classname = "PipelinesProto";
  25. option java_package = "com.google.genomics.v1a";
  26. // A service for running genomics pipelines.
  27. service PipelinesV1Alpha2 {
  28. // Creates a pipeline that can be run later. Create takes a Pipeline that
  29. // has all fields other than `pipelineId` populated, and then returns
  30. // the same pipeline with `pipelineId` populated. This id can be used
  31. // to run the pipeline.
  32. //
  33. // Caller must have WRITE permission to the project.
  34. rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) {
  35. option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" };
  36. }
  37. // Runs a pipeline. If `pipelineId` is specified in the request, then
  38. // run a saved pipeline. If `ephemeralPipeline` is specified, then run
  39. // that pipeline once without saving a copy.
  40. //
  41. // The caller must have READ permission to the project where the pipeline
  42. // is stored and WRITE permission to the project where the pipeline will be
  43. // run, as VMs will be created and storage will be used.
  44. rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) {
  45. option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" };
  46. }
  47. // Retrieves a pipeline based on ID.
  48. //
  49. // Caller must have READ permission to the project.
  50. rpc GetPipeline(GetPipelineRequest) returns (Pipeline) {
  51. option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" };
  52. }
  53. // Lists pipelines.
  54. //
  55. // Caller must have READ permission to the project.
  56. rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) {
  57. option (google.api.http) = { get: "/v1alpha2/pipelines" };
  58. }
  59. // Deletes a pipeline based on ID.
  60. //
  61. // Caller must have WRITE permission to the project.
  62. rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {
  63. option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" };
  64. }
  65. // Gets controller configuration information. Should only be called
  66. // by VMs created by the Pipelines Service and not by end users.
  67. rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) {
  68. option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" };
  69. }
  70. // Sets status of a given operation. All timestamps are sent on each
  71. // call, and the whole series of events is replaced, in case
  72. // intermediate calls are lost. Should only be called by VMs created
  73. // by the Pipelines Service and not by end users.
  74. rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) {
  75. option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" };
  76. }
  77. }
  78. // Describes a Compute Engine resource that is being managed by a running
  79. // [pipeline][google.genomics.v1alpha2.Pipeline].
  80. message ComputeEngine {
  81. // The instance on which the operation is running.
  82. string instance_name = 1;
  83. // The availability zone in which the instance resides.
  84. string zone = 2;
  85. // The machine type of the instance.
  86. string machine_type = 3;
  87. // The names of the disks that were created for this pipeline.
  88. repeated string disk_names = 4;
  89. }
  90. // Runtime metadata that will be populated in the
  91. // [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata]
  92. // field of the Operation associated with a RunPipeline execution.
  93. message RuntimeMetadata {
  94. // Execution information specific to Google Compute Engine.
  95. ComputeEngine compute_engine = 1;
  96. }
  97. // The pipeline object. Represents a transformation from a set of input
  98. // parameters to a set of output parameters. The transformation is defined
  99. // as a docker image and command to run within that image. Each pipeline
  100. // is run on a Google Compute Engine VM. A pipeline can be created with the
  101. // `create` method and then later run with the `run` method, or a pipeline can
  102. // be defined and run all at once with the `run` method.
  103. message Pipeline {
  104. // Required. The project in which to create the pipeline. The caller must have
  105. // WRITE access.
  106. string project_id = 1;
  107. // Required. A user specified pipeline name that does not have to be unique.
  108. // This name can be used for filtering Pipelines in ListPipelines.
  109. string name = 2;
  110. // User-specified description.
  111. string description = 3;
  112. // Input parameters of the pipeline.
  113. repeated PipelineParameter input_parameters = 8;
  114. // Output parameters of the pipeline.
  115. repeated PipelineParameter output_parameters = 9;
  116. // Required. The executor indicates in which environment the pipeline runs.
  117. oneof executor {
  118. // Specifies the docker run information.
  119. DockerExecutor docker = 5;
  120. }
  121. // Required. Specifies resource requirements for the pipeline run.
  122. // Required fields:
  123. //
  124. // *
  125. // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores]
  126. //
  127. // *
  128. // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
  129. PipelineResources resources = 6;
  130. // Unique pipeline id that is generated by the service when CreatePipeline
  131. // is called. Cannot be specified in the Pipeline used in the
  132. // CreatePipelineRequest, and will be populated in the response to
  133. // CreatePipeline and all subsequent Get and List calls. Indicates that the
  134. // service has registered this pipeline.
  135. string pipeline_id = 7;
  136. }
  137. // The request to create a pipeline. The pipeline field here should not have
  138. // `pipelineId` populated, as that will be populated by the server.
  139. message CreatePipelineRequest {
  140. // The pipeline to create. Should not have `pipelineId` populated.
  141. Pipeline pipeline = 1;
  142. }
  143. // The pipeline run arguments.
  144. message RunPipelineArgs {
  145. // Required. The project in which to run the pipeline. The caller must have
  146. // WRITER access to all Google Cloud services and resources (e.g. Google
  147. // Compute Engine) will be used.
  148. string project_id = 1;
  149. // Pipeline input arguments; keys are defined in the pipeline documentation.
  150. // All input parameters that do not have default values must be specified.
  151. // If parameters with defaults are specified here, the defaults will be
  152. // overridden.
  153. map<string, string> inputs = 2;
  154. // Pipeline output arguments; keys are defined in the pipeline
  155. // documentation. All output parameters of without default values
  156. // must be specified. If parameters with defaults are specified
  157. // here, the defaults will be overridden.
  158. map<string, string> outputs = 3;
  159. // The Google Cloud Service Account that will be used to access data and
  160. // services. By default, the compute service account associated with
  161. // `projectId` is used.
  162. ServiceAccount service_account = 4;
  163. // Client-specified pipeline operation identifier.
  164. string client_id = 5;
  165. // Specifies resource requirements/overrides for the pipeline run.
  166. PipelineResources resources = 6;
  167. // Required. Logging options. Used by the service to communicate results
  168. // to the user.
  169. LoggingOptions logging = 7;
  170. // How long to keep the VM up after a failure (for example docker command
  171. // failed, copying input or output files failed, etc). While the VM is up, one
  172. // can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day.
  173. google.protobuf.Duration keep_vm_alive_on_failure_duration = 8;
  174. }
  175. // The request to run a pipeline. If `pipelineId` is specified, it
  176. // refers to a saved pipeline created with CreatePipeline and set as
  177. // the `pipelineId` of the returned Pipeline object. If
  178. // `ephemeralPipeline` is specified, that pipeline is run once
  179. // with the given args and not saved. It is an error to specify both
  180. // `pipelineId` and `ephemeralPipeline`. `pipelineArgs`
  181. // must be specified.
  182. message RunPipelineRequest {
  183. oneof pipeline {
  184. // The already created pipeline to run.
  185. string pipeline_id = 1;
  186. // A new pipeline object to run once and then delete.
  187. Pipeline ephemeral_pipeline = 2;
  188. }
  189. // The arguments to use when running this pipeline.
  190. RunPipelineArgs pipeline_args = 3;
  191. }
  192. // A request to get a saved pipeline by id.
  193. message GetPipelineRequest {
  194. // Caller must have READ access to the project in which this pipeline
  195. // is defined.
  196. string pipeline_id = 1;
  197. }
  198. // A request to list pipelines in a given project. Pipelines can be
  199. // filtered by name using `namePrefix`: all pipelines with names that
  200. // begin with `namePrefix` will be returned. Uses standard pagination:
  201. // `pageSize` indicates how many pipelines to return, and
  202. // `pageToken` comes from a previous ListPipelinesResponse to
  203. // indicate offset.
  204. message ListPipelinesRequest {
  205. // Required. The name of the project to search for pipelines. Caller
  206. // must have READ access to this project.
  207. string project_id = 1;
  208. // Pipelines with names that match this prefix should be
  209. // returned. If unspecified, all pipelines in the project, up to
  210. // `pageSize`, will be returned.
  211. string name_prefix = 2;
  212. // Number of pipelines to return at once. Defaults to 256, and max
  213. // is 2048.
  214. int32 page_size = 3;
  215. // Token to use to indicate where to start getting results.
  216. // If unspecified, returns the first page of results.
  217. string page_token = 4;
  218. }
  219. // The response of ListPipelines. Contains at most `pageSize`
  220. // pipelines. If it contains `pageSize` pipelines, and more pipelines
  221. // exist, then `nextPageToken` will be populated and should be
  222. // used as the `pageToken` argument to a subsequent ListPipelines
  223. // request.
  224. message ListPipelinesResponse {
  225. // The matched pipelines.
  226. repeated Pipeline pipelines = 1;
  227. // The token to use to get the next page of results.
  228. string next_page_token = 2;
  229. }
  230. // The request to delete a saved pipeline by ID.
  231. message DeletePipelineRequest {
  232. // Caller must have WRITE access to the project in which this pipeline
  233. // is defined.
  234. string pipeline_id = 1;
  235. }
  236. // Request to get controller configuation. Should only be used
  237. // by VMs created by the Pipelines Service and not by end users.
  238. message GetControllerConfigRequest {
  239. // The operation to retrieve controller configuration for.
  240. string operation_id = 1;
  241. uint64 validation_token = 2;
  242. }
  243. // Stores the information that the controller will fetch from the
  244. // server in order to run. Should only be used by VMs created by the
  245. // Pipelines Service and not by end users.
  246. message ControllerConfig {
  247. message RepeatedString {
  248. repeated string values = 1;
  249. }
  250. string image = 1;
  251. string cmd = 2;
  252. string gcs_log_path = 3;
  253. string machine_type = 4;
  254. map<string, string> vars = 5;
  255. map<string, string> disks = 6;
  256. map<string, RepeatedString> gcs_sources = 7;
  257. map<string, RepeatedString> gcs_sinks = 8;
  258. }
  259. // Stores the list of events and times they occured for major events in job
  260. // execution.
  261. message TimestampEvent {
  262. // String indicating the type of event
  263. string description = 1;
  264. // The time this event occured.
  265. google.protobuf.Timestamp timestamp = 2;
  266. }
  267. // Request to set operation status. Should only be used by VMs
  268. // created by the Pipelines Service and not by end users.
  269. message SetOperationStatusRequest {
  270. string operation_id = 1;
  271. repeated TimestampEvent timestamp_events = 2;
  272. google.rpc.Code error_code = 3;
  273. string error_message = 4;
  274. uint64 validation_token = 5;
  275. }
  276. // A Google Cloud Service Account.
  277. message ServiceAccount {
  278. // Email address of the service account. Defaults to `default`,
  279. // which uses the compute service account associated with the project.
  280. string email = 1;
  281. // List of scopes to be enabled for this service account on the
  282. // pipeline virtual machine.
  283. // The following scopes are automatically included:
  284. // * https://www.googleapis.com/auth/genomics
  285. // * https://www.googleapis.com/auth/compute
  286. // * https://www.googleapis.com/auth/devstorage.full_control
  287. repeated string scopes = 2;
  288. }
  289. // The logging options for the pipeline run.
  290. message LoggingOptions {
  291. // The location in Google Cloud Storage to which the pipeline logs
  292. // will be copied. Can be specified as a fully qualified directory
  293. // path, in which case logs will be output with a unique identifier
  294. // as the filename in that directory, or as a fully specified path,
  295. // which must end in `.log`, in which case that path will be
  296. // used, and the user must ensure that logs are not
  297. // overwritten. Stdout and stderr logs from the run are also
  298. // generated and output as `-stdout.log` and `-stderr.log`.
  299. string gcs_path = 1;
  300. }
  301. // The system resources for the pipeline run.
  302. message PipelineResources {
  303. // A Google Compute Engine disk resource specification.
  304. message Disk {
  305. // The types of disks that may be attached to VMs.
  306. enum Type {
  307. // Default disk type. Use one of the other options below.
  308. TYPE_UNSPECIFIED = 0;
  309. // Specifies a Google Compute Engine persistent hard disk. See
  310. // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
  311. PERSISTENT_HDD = 1;
  312. // Specifies a Google Compute Engine persistent solid-state disk. See
  313. // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
  314. PERSISTENT_SSD = 2;
  315. // Specifies a Google Compute Engine local SSD.
  316. // See https://cloud.google.com/compute/docs/disks/local-ssd for details.
  317. LOCAL_SSD = 3;
  318. }
  319. // Required. The name of the disk that can be used in the pipeline
  320. // parameters. Must be 1 - 63 characters.
  321. // The name "boot" is reserved for system use.
  322. string name = 1;
  323. // Required. The type of the disk to create.
  324. Type type = 2;
  325. // The size of the disk. Defaults to 500 (GB).
  326. // This field is not applicable for local SSD.
  327. int32 size_gb = 3;
  328. // The full or partial URL of the persistent disk to attach. See
  329. // https://cloud.google.com/compute/docs/reference/latest/instances#resource
  330. // and
  331. // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots
  332. // for more details.
  333. string source = 4;
  334. // Specifies whether or not to delete the disk when the pipeline
  335. // completes. This field is applicable only for newly created disks. See
  336. // https://cloud.google.com/compute/docs/reference/latest/instances#resource
  337. // for more details.
  338. // By default, `autoDelete` is `false`. `autoDelete` will be enabled if set
  339. // to `true` at create time or run time.
  340. bool auto_delete = 6;
  341. // Specifies how a sourced-base persistent disk will be mounted. See
  342. // https://cloud.google.com/compute/docs/disks/persistent-disks#use_multi_instances
  343. // for more details.
  344. // Can only be set at create time.
  345. bool read_only = 7;
  346. // Required at create time and cannot be overridden at run time.
  347. // Specifies the path in the docker container where files on
  348. // this disk should be located. For example, if `mountPoint`
  349. // is `/mnt/disk`, and the parameter has `localPath`
  350. // `inputs/file.txt`, the docker container can access the data at
  351. // `/mnt/disk/inputs/file.txt`.
  352. string mount_point = 8;
  353. }
  354. // The minimum number of cores to use. Defaults to 1.
  355. int32 minimum_cpu_cores = 1;
  356. // At create time means that preemptible machines may be
  357. // used for the run. At run time, means they should be used. Cannot
  358. // be true at run time if false at create time.
  359. // Defaults to `false`.
  360. bool preemptible = 2;
  361. // The minimum amount of RAM to use. Defaults to 3.75 (GB)
  362. double minimum_ram_gb = 3;
  363. // Disks to attach.
  364. repeated Disk disks = 4;
  365. // List of Google Compute Engine availability zones to which resource
  366. // creation will restricted. If empty, any zone may be chosen.
  367. repeated string zones = 5;
  368. // The size of the boot disk. Defaults to 10 (GB).
  369. int32 boot_disk_size_gb = 6;
  370. }
  371. // Parameters facilitate setting and delivering data into the
  372. // pipeline's execution environment. They are defined at create time,
  373. // with optional defaults, and can be overridden at run time.
  374. //
  375. // If `localCopy` is unset, then the parameter specifies a string that
  376. // is passed as-is into the pipeline, as the value of the environment
  377. // variable with the given name. A default value can be optionally
  378. // specified at create time. The default can be overridden at run time
  379. // using the inputs map. If no default is given, a value must be
  380. // supplied at runtime.
  381. //
  382. // If `localCopy` is defined, then the parameter specifies a data
  383. // source or sink, both in Google Cloud Storage and on the Docker container
  384. // where the pipeline computation is run. The [service account associated with
  385. // the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by
  386. // default the project's Compute Engine service account) must have access to the
  387. // Google Cloud Storage paths.
  388. //
  389. // At run time, the Google Cloud Storage paths can be overridden if a default
  390. // was provided at create time, or must be set otherwise. The pipeline runner
  391. // should add a key/value pair to either the inputs or outputs map. The
  392. // indicated data copies will be carried out before/after pipeline execution,
  393. // just as if the corresponding arguments were provided to `gsutil cp`.
  394. //
  395. // For example: Given the following `PipelineParameter`, specified
  396. // in the `inputParameters` list:
  397. //
  398. // ```
  399. // {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}}
  400. // ```
  401. //
  402. // where `disk` is defined in the `PipelineResources` object as:
  403. //
  404. // ```
  405. // {name: "pd1", mountPoint: "/mnt/disk/"}
  406. // ```
  407. //
  408. // We create a disk named `pd1`, mount it on the host VM, and map
  409. // `/mnt/pd1` to `/mnt/disk` in the docker container. At
  410. // runtime, an entry for `input_file` would be required in the inputs
  411. // map, such as:
  412. //
  413. // ```
  414. // inputs["input_file"] = "gs://my-bucket/bar.txt"
  415. // ```
  416. //
  417. // This would generate the following gsutil call:
  418. //
  419. // ```
  420. // gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt
  421. // ```
  422. //
  423. // The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the
  424. // Docker container. Acceptable paths are:
  425. //
  426. // <table>
  427. // <thead>
  428. // <tr><th>Google Cloud storage path</th><th>Local path</th></tr>
  429. // </thead>
  430. // <tbody>
  431. // <tr><td>file</td><td>file</td></tr>
  432. // <tr><td>glob</td><td>directory</td></tr>
  433. // </tbody>
  434. // </table>
  435. //
  436. // For outputs, the direction of the copy is reversed:
  437. //
  438. // ```
  439. // gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt
  440. // ```
  441. //
  442. // Acceptable paths are:
  443. //
  444. // <table>
  445. // <thead>
  446. // <tr><th>Local path</th><th>Google Cloud Storage path</th></tr>
  447. // </thead>
  448. // <tbody>
  449. // <tr><td>file</td><td>file</td></tr>
  450. // <tr>
  451. // <td>file</td>
  452. // <td>directory - directory must already exist</td>
  453. // </tr>
  454. // <tr>
  455. // <td>glob</td>
  456. // <td>directory - directory will be created if it doesn't exist</td></tr>
  457. // </tbody>
  458. // </table>
  459. //
  460. // One restriction due to docker limitations, is that for outputs that are found
  461. // on the boot disk, the local path cannot be a glob and must be a file.
  462. message PipelineParameter {
  463. // LocalCopy defines how a remote file should be copied to and from the VM.
  464. message LocalCopy {
  465. // Required. The path within the user's docker container where
  466. // this input should be localized to and from, relative to the specified
  467. // disk's mount point. For example: file.txt,
  468. string path = 1;
  469. // Required. The name of the disk where this parameter is
  470. // located. Can be the name of one of the disks specified in the
  471. // Resources field, or "boot", which represents the Docker
  472. // instance's boot disk and has a mount point of `/`.
  473. string disk = 2;
  474. }
  475. // Required. Name of the parameter - the pipeline runner uses this string
  476. // as the key to the input and output maps in RunPipeline.
  477. string name = 1;
  478. // Human-readable description.
  479. string description = 2;
  480. // The default value for this parameter. Can be overridden at runtime.
  481. // If `localCopy` is present, then this must be a Google Cloud Storage path
  482. // beginning with `gs://`.
  483. string default_value = 5;
  484. // If present, this parameter is marked for copying to and from the VM.
  485. // `LocalCopy` indicates where on the VM the file should be. The value
  486. // given to this parameter (either at runtime or using `defaultValue`)
  487. // must be the remote path where the file should be.
  488. LocalCopy local_copy = 6;
  489. }
  490. // The Docker execuctor specification.
  491. message DockerExecutor {
  492. // Required. Image name from either Docker Hub or Google Container Repository.
  493. // Users that run pipelines must have READ access to the image.
  494. string image_name = 1;
  495. // Required. The command string to run. Parameters that do not have
  496. // `localCopy` specified should be used as environment variables, while
  497. // those that do can be accessed at the defined paths.
  498. string cmd = 2;
  499. }