clusters.proto 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.dataproc.v1;
  16. import "google/api/annotations.proto";
  17. import "google/cloud/dataproc/v1/operations.proto";
  18. import "google/longrunning/operations.proto";
  19. import "google/protobuf/duration.proto";
  20. import "google/protobuf/field_mask.proto";
  21. import "google/protobuf/timestamp.proto";
  22. option java_multiple_files = true;
  23. option java_outer_classname = "ClustersProto";
  24. option java_package = "com.google.cloud.dataproc.v1";
  25. // The ClusterControllerService provides methods to manage clusters
  26. // of Google Compute Engine instances.
  27. service ClusterController {
  28. // Creates a cluster in a project.
  29. rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) {
  30. option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters" body: "cluster" };
  31. }
  32. // Updates a cluster in a project.
  33. rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) {
  34. option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" };
  35. }
  36. // Deletes a cluster in a project.
  37. rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) {
  38. option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
  39. }
  40. // Gets the resource representation for a cluster in a project.
  41. rpc GetCluster(GetClusterRequest) returns (Cluster) {
  42. option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
  43. }
  44. // Lists all regions/{region}/clusters in a project.
  45. rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
  46. option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters" };
  47. }
  48. // Gets cluster diagnostic information.
  49. // After the operation completes, the Operation.response field
  50. // contains `DiagnoseClusterOutputLocation`.
  51. rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) {
  52. option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" };
  53. }
  54. }
  55. // Describes the identifying information, config, and status of
  56. // a cluster of Google Compute Engine instances.
  57. message Cluster {
  58. // [Required] The Google Cloud Platform project ID that the cluster belongs to.
  59. string project_id = 1;
  60. // [Required] The cluster name. Cluster names within a project must be
  61. // unique. Names of deleted clusters can be reused.
  62. string cluster_name = 2;
  63. // [Required] The cluster config. Note that Cloud Dataproc may set
  64. // default values, and values may change when clusters are updated.
  65. ClusterConfig config = 3;
  66. // [Output-only] Cluster status.
  67. ClusterStatus status = 4;
  68. // [Output-only] The previous cluster status.
  69. repeated ClusterStatus status_history = 7;
  70. // [Output-only] A cluster UUID (Unique Universal Identifier). Cloud Dataproc
  71. // generates this value when it creates the cluster.
  72. string cluster_uuid = 6;
  73. }
  74. // The cluster config.
  75. message ClusterConfig {
  76. // [Optional] A Google Cloud Storage staging bucket used for sharing generated
  77. // SSH keys and config. If you do not specify a staging bucket, Cloud
  78. // Dataproc will determine an appropriate Cloud Storage location (US,
  79. // ASIA, or EU) for your cluster's staging bucket according to the Google
  80. // Compute Engine zone where your cluster is deployed, and then it will create
  81. // and manage this project-level, per-location bucket for you.
  82. string config_bucket = 1;
  83. // [Required] The shared Google Compute Engine config settings for
  84. // all instances in a cluster.
  85. GceClusterConfig gce_cluster_config = 8;
  86. // [Optional] The Google Compute Engine config settings for
  87. // the master instance in a cluster.
  88. InstanceGroupConfig master_config = 9;
  89. // [Optional] The Google Compute Engine config settings for
  90. // worker instances in a cluster.
  91. InstanceGroupConfig worker_config = 10;
  92. // [Optional] The Google Compute Engine config settings for
  93. // additional worker instances in a cluster.
  94. InstanceGroupConfig secondary_worker_config = 12;
  95. // [Optional] The config settings for software inside the cluster.
  96. SoftwareConfig software_config = 13;
  97. // [Optional] Commands to execute on each node after config is
  98. // completed. By default, executables are run on master and all worker nodes.
  99. // You can test a node's <code>role</code> metadata to run an executable on
  100. // a master or worker node, as shown below using `curl` (you can also use `wget`):
  101. //
  102. // ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
  103. // if [[ "${ROLE}" == 'Master' ]]; then
  104. // ... master specific actions ...
  105. // else
  106. // ... worker specific actions ...
  107. // fi
  108. repeated NodeInitializationAction initialization_actions = 11;
  109. }
  110. // Common config settings for resources of Google Compute Engine cluster
  111. // instances, applicable to all instances in the cluster.
  112. message GceClusterConfig {
  113. // [Required] The zone where the Google Compute Engine cluster will be located.
  114. // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`.
  115. string zone_uri = 1;
  116. // [Optional] The Google Compute Engine network to be used for machine
  117. // communications. Cannot be specified with subnetwork_uri. If neither
  118. // `network_uri` nor `subnetwork_uri` is specified, the "default" network of
  119. // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
  120. // [Using Subnetworks](/compute/docs/subnetworks) for more information).
  121. // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`.
  122. string network_uri = 2;
  123. // [Optional] The Google Compute Engine subnetwork to be used for machine
  124. // communications. Cannot be specified with network_uri.
  125. // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`.
  126. string subnetwork_uri = 6;
  127. // [Optional] If true, all instances in the cluster will only have internal IP
  128. // addresses. By default, clusters are not restricted to internal IP addresses,
  129. // and will have ephemeral external IP addresses assigned to each instance.
  130. // This `internal_ip_only` restriction can only be enabled for subnetwork
  131. // enabled networks, and all off-cluster dependencies must be configured to be
  132. // accessible without external IP addresses.
  133. bool internal_ip_only = 7;
  134. // [Optional] The URIs of service account scopes to be included in Google
  135. // Compute Engine instances. The following base set of scopes is always
  136. // included:
  137. //
  138. // * https://www.googleapis.com/auth/cloud.useraccounts.readonly
  139. // * https://www.googleapis.com/auth/devstorage.read_write
  140. // * https://www.googleapis.com/auth/logging.write
  141. //
  142. // If no scopes are specified, the following defaults are also provided:
  143. //
  144. // * https://www.googleapis.com/auth/bigquery
  145. // * https://www.googleapis.com/auth/bigtable.admin.table
  146. // * https://www.googleapis.com/auth/bigtable.data
  147. // * https://www.googleapis.com/auth/devstorage.full_control
  148. repeated string service_account_scopes = 3;
  149. // The Google Compute Engine tags to add to all instances (see
  150. // [Labeling instances](/compute/docs/label-or-tag-resources#labeling_instances)).
  151. repeated string tags = 4;
  152. // The Google Compute Engine metadata entries to add to all instances (see
  153. // [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
  154. map<string, string> metadata = 5;
  155. }
  156. // [Optional] The config settings for Google Compute Engine resources in
  157. // an instance group, such as a master or worker group.
  158. message InstanceGroupConfig {
  159. // [Required] The number of VM instances in the instance group.
  160. // For master instance groups, must be set to 1.
  161. int32 num_instances = 1;
  162. // [Optional] The list of instance names. Cloud Dataproc derives the names from
  163. // `cluster_name`, `num_instances`, and the instance group if not set by user
  164. // (recommended practice is to let Cloud Dataproc derive the name).
  165. repeated string instance_names = 2;
  166. // [Output-only] The Google Compute Engine image resource used for cluster
  167. // instances. Inferred from `SoftwareConfig.image_version`.
  168. string image_uri = 3;
  169. // [Required] The Google Compute Engine machine type used for cluster instances.
  170. // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`.
  171. string machine_type_uri = 4;
  172. // [Optional] Disk option config settings.
  173. DiskConfig disk_config = 5;
  174. // [Optional] Specifies that this instance group contains preemptible instances.
  175. bool is_preemptible = 6;
  176. // [Output-only] The config for Google Compute Engine Instance Group
  177. // Manager that manages this group.
  178. // This is only used for preemptible instance groups.
  179. ManagedGroupConfig managed_group_config = 7;
  180. }
  181. // Specifies the resources used to actively manage an instance group.
  182. message ManagedGroupConfig {
  183. // [Output-only] The name of the Instance Template used for the Managed
  184. // Instance Group.
  185. string instance_template_name = 1;
  186. // [Output-only] The name of the Instance Group Manager for this group.
  187. string instance_group_manager_name = 2;
  188. }
  189. // Specifies the config of disk options for a group of VM instances.
  190. message DiskConfig {
  191. // [Optional] Size in GB of the boot disk (default is 500GB).
  192. int32 boot_disk_size_gb = 1;
  193. // [Optional] Number of attached SSDs, from 0 to 4 (default is 0).
  194. // If SSDs are not attached, the boot disk is used to store runtime logs and
  195. // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
  196. // If one or more SSDs are attached, this runtime bulk
  197. // data is spread across them, and the boot disk contains only basic
  198. // config and installed binaries.
  199. int32 num_local_ssds = 2;
  200. }
  201. // Specifies an executable to run on a fully configured node and a
  202. // timeout period for executable completion.
  203. message NodeInitializationAction {
  204. // [Required] Google Cloud Storage URI of executable file.
  205. string executable_file = 1;
  206. // [Optional] Amount of time executable has to complete. Default is
  207. // 10 minutes. Cluster creation fails with an explanatory error message (the
  208. // name of the executable that caused the error and the exceeded timeout
  209. // period) if the executable is not completed at end of the timeout period.
  210. google.protobuf.Duration execution_timeout = 2;
  211. }
  212. // The status of a cluster and its instances.
  213. message ClusterStatus {
  214. // The cluster state.
  215. enum State {
  216. // The cluster state is unknown.
  217. UNKNOWN = 0;
  218. // The cluster is being created and set up. It is not ready for use.
  219. CREATING = 1;
  220. // The cluster is currently running and healthy. It is ready for use.
  221. RUNNING = 2;
  222. // The cluster encountered an error. It is not ready for use.
  223. ERROR = 3;
  224. // The cluster is being deleted. It cannot be used.
  225. DELETING = 4;
  226. // The cluster is being updated. It continues to accept and process jobs.
  227. UPDATING = 5;
  228. }
  229. // [Output-only] The cluster's state.
  230. State state = 1;
  231. // [Output-only] Optional details of cluster's state.
  232. string detail = 2;
  233. // [Output-only] Time when this state was entered.
  234. google.protobuf.Timestamp state_start_time = 3;
  235. }
  236. // Specifies the selection and config of software inside the cluster.
  237. message SoftwareConfig {
  238. // [Optional] The version of software inside the cluster. It must match the
  239. // regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the
  240. // latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)).
  241. string image_version = 1;
  242. // [Optional] The properties to set on daemon config files.
  243. //
  244. // Property keys are specified in `prefix:property` format, such as
  245. // `core:fs.defaultFS`. The following are supported prefixes
  246. // and their mappings:
  247. //
  248. // * core: `core-site.xml`
  249. // * hdfs: `hdfs-site.xml`
  250. // * mapred: `mapred-site.xml`
  251. // * yarn: `yarn-site.xml`
  252. // * hive: `hive-site.xml`
  253. // * pig: `pig.properties`
  254. // * spark: `spark-defaults.conf`
  255. map<string, string> properties = 2;
  256. }
  257. // A request to create a cluster.
  258. message CreateClusterRequest {
  259. // [Required] The ID of the Google Cloud Platform project that the cluster
  260. // belongs to.
  261. string project_id = 1;
  262. // [Required] The Cloud Dataproc region in which to handle the request.
  263. string region = 3;
  264. // [Required] The cluster to create.
  265. Cluster cluster = 2;
  266. }
  267. // A request to update a cluster.
  268. message UpdateClusterRequest {
  269. // [Required] The ID of the Google Cloud Platform project the
  270. // cluster belongs to.
  271. string project_id = 1;
  272. // [Required] The Cloud Dataproc region in which to handle the request.
  273. string region = 5;
  274. // [Required] The cluster name.
  275. string cluster_name = 2;
  276. // [Required] The changes to the cluster.
  277. Cluster cluster = 3;
  278. // [Required] Specifies the path, relative to <code>Cluster</code>, of
  279. // the field to update. For example, to change the number of workers
  280. // in a cluster to 5, the <code>update_mask</code> parameter would be
  281. // specified as <code>config.worker_config.num_instances</code>,
  282. // and the `PATCH` request body would specify the new value, as follows:
  283. //
  284. // {
  285. // "config":{
  286. // "workerConfig":{
  287. // "numInstances":"5"
  288. // }
  289. // }
  290. // }
  291. // Similarly, to change the number of preemptible workers in a cluster to 5, the
  292. // <code>update_mask</code> parameter would be <code>config.secondary_worker_config.num_instances</code>,
  293. // and the `PATCH` request body would be set as follows:
  294. //
  295. // {
  296. // "config":{
  297. // "secondaryWorkerConfig":{
  298. // "numInstances":"5"
  299. // }
  300. // }
  301. // }
  302. // <strong>Note:</strong> Currently, <code>config.worker_config.num_instances</code>
  303. // and <code>config.secondary_worker_config.num_instances</code> are the only
  304. // fields that can be updated.
  305. google.protobuf.FieldMask update_mask = 4;
  306. }
  307. // A request to delete a cluster.
  308. message DeleteClusterRequest {
  309. // [Required] The ID of the Google Cloud Platform project that the cluster
  310. // belongs to.
  311. string project_id = 1;
  312. // [Required] The Cloud Dataproc region in which to handle the request.
  313. string region = 3;
  314. // [Required] The cluster name.
  315. string cluster_name = 2;
  316. }
  317. // Request to get the resource representation for a cluster in a project.
  318. message GetClusterRequest {
  319. // [Required] The ID of the Google Cloud Platform project that the cluster
  320. // belongs to.
  321. string project_id = 1;
  322. // [Required] The Cloud Dataproc region in which to handle the request.
  323. string region = 3;
  324. // [Required] The cluster name.
  325. string cluster_name = 2;
  326. }
  327. // A request to list the clusters in a project.
  328. message ListClustersRequest {
  329. // [Required] The ID of the Google Cloud Platform project that the cluster
  330. // belongs to.
  331. string project_id = 1;
  332. // [Required] The Cloud Dataproc region in which to handle the request.
  333. string region = 4;
  334. // [Optional] The standard List page size.
  335. int32 page_size = 2;
  336. // [Optional] The standard List page token.
  337. string page_token = 3;
  338. }
  339. // The list of all clusters in a project.
  340. message ListClustersResponse {
  341. // [Output-only] The clusters in the project.
  342. repeated Cluster clusters = 1;
  343. // [Output-only] This token is included in the response if there are more
  344. // results to fetch. To fetch additional results, provide this value as the
  345. // `page_token` in a subsequent <code>ListClustersRequest</code>.
  346. string next_page_token = 2;
  347. }
  348. // A request to collect cluster diagnostic information.
  349. message DiagnoseClusterRequest {
  350. // [Required] The ID of the Google Cloud Platform project that the cluster
  351. // belongs to.
  352. string project_id = 1;
  353. // [Required] The Cloud Dataproc region in which to handle the request.
  354. string region = 3;
  355. // [Required] The cluster name.
  356. string cluster_name = 2;
  357. }
  358. // The location of diagnostic output.
  359. message DiagnoseClusterResults {
  360. // [Output-only] The Google Cloud Storage URI of the diagnostic output.
  361. // The output report is a plain text file with a summary of collected
  362. // diagnostics.
  363. string output_uri = 1;
  364. }