jobs.proto 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.dataproc.v1;
  16. import "google/api/annotations.proto";
  17. import "google/protobuf/empty.proto";
  18. import "google/protobuf/timestamp.proto";
  19. option java_multiple_files = true;
  20. option java_outer_classname = "JobsProto";
  21. option java_package = "com.google.cloud.dataproc.v1";
  22. // The JobController provides methods to manage jobs.
  23. service JobController {
  24. // Submits a job to a cluster.
  25. rpc SubmitJob(SubmitJobRequest) returns (Job) {
  26. option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" body: "*" };
  27. }
  28. // Gets the resource representation for a job in a project.
  29. rpc GetJob(GetJobRequest) returns (Job) {
  30. option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
  31. }
  32. // Lists regions/{region}/jobs in a project.
  33. rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
  34. option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" };
  35. }
  36. // Starts a job cancellation request. To access the job resource
  37. // after cancellation, call
  38. // [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or
  39. // [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get).
  40. rpc CancelJob(CancelJobRequest) returns (Job) {
  41. option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" };
  42. }
  43. // Deletes the job from the project. If the job is active, the delete fails,
  44. // and the response returns `FAILED_PRECONDITION`.
  45. rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
  46. option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
  47. }
  48. }
  49. // The runtime logging config of the job.
  50. message LoggingConfig {
  51. // The Log4j level for job execution. When running an
  52. // [Apache Hive](http://hive.apache.org/) job, Cloud
  53. // Dataproc configures the Hive client to an equivalent verbosity level.
  54. enum Level {
  55. // Level is unspecified. Use default level for log4j.
  56. LEVEL_UNSPECIFIED = 0;
  57. // Use ALL level for log4j.
  58. ALL = 1;
  59. // Use TRACE level for log4j.
  60. TRACE = 2;
  61. // Use DEBUG level for log4j.
  62. DEBUG = 3;
  63. // Use INFO level for log4j.
  64. INFO = 4;
  65. // Use WARN level for log4j.
  66. WARN = 5;
  67. // Use ERROR level for log4j.
  68. ERROR = 6;
  69. // Use FATAL level for log4j.
  70. FATAL = 7;
  71. // Turn off log4j.
  72. OFF = 8;
  73. }
  74. // The per-package log levels for the driver. This may include
  75. // "root" package name to configure rootLogger.
  76. // Examples:
  77. // 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
  78. map<string, Level> driver_log_levels = 2;
  79. }
  80. // A Cloud Dataproc job for running
  81. // [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
  82. // jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
  83. message HadoopJob {
  84. // [Required] Indicates the location of the driver's main class. Specify
  85. // either the jar file that contains the main class or the main class name.
  86. // To specify both, add the jar file to `jar_file_uris`, and then specify
  87. // the main class name in this property.
  88. oneof driver {
  89. // The HCFS URI of the jar file containing the main class.
  90. // Examples:
  91. // 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
  92. // 'hdfs:/tmp/test-samples/custom-wordcount.jar'
  93. // 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
  94. string main_jar_file_uri = 1;
  95. // The name of the driver's main class. The jar file containing the class
  96. // must be in the default CLASSPATH or specified in `jar_file_uris`.
  97. string main_class = 2;
  98. }
  99. // [Optional] The arguments to pass to the driver. Do not
  100. // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job
  101. // properties, since a collision may occur that causes an incorrect job
  102. // submission.
  103. repeated string args = 3;
  104. // [Optional] Jar file URIs to add to the CLASSPATHs of the
  105. // Hadoop driver and tasks.
  106. repeated string jar_file_uris = 4;
  107. // [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
  108. // to the working directory of Hadoop drivers and distributed tasks. Useful
  109. // for naively parallel tasks.
  110. repeated string file_uris = 5;
  111. // [Optional] HCFS URIs of archives to be extracted in the working directory of
  112. // Hadoop drivers and tasks. Supported file types:
  113. // .jar, .tar, .tar.gz, .tgz, or .zip.
  114. repeated string archive_uris = 6;
  115. // [Optional] A mapping of property names to values, used to configure Hadoop.
  116. // Properties that conflict with values set by the Cloud Dataproc API may be
  117. // overwritten. Can include properties set in /etc/hadoop/conf/*-site and
  118. // classes in user code.
  119. map<string, string> properties = 7;
  120. // [Optional] The runtime log config for job execution.
  121. LoggingConfig logging_config = 8;
  122. }
  123. // A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/)
  124. // applications on YARN.
  125. message SparkJob {
  126. // [Required] The specification of the main method to call to drive the job.
  127. // Specify either the jar file that contains the main class or the main class
  128. // name. To pass both a main jar and a main class in that jar, add the jar to
  129. // `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`.
  130. oneof driver {
  131. // The HCFS URI of the jar file that contains the main class.
  132. string main_jar_file_uri = 1;
  133. // The name of the driver's main class. The jar file that contains the class
  134. // must be in the default CLASSPATH or specified in `jar_file_uris`.
  135. string main_class = 2;
  136. }
  137. // [Optional] The arguments to pass to the driver. Do not include arguments,
  138. // such as `--conf`, that can be set as job properties, since a collision may
  139. // occur that causes an incorrect job submission.
  140. repeated string args = 3;
  141. // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
  142. // Spark driver and tasks.
  143. repeated string jar_file_uris = 4;
  144. // [Optional] HCFS URIs of files to be copied to the working directory of
  145. // Spark drivers and distributed tasks. Useful for naively parallel tasks.
  146. repeated string file_uris = 5;
  147. // [Optional] HCFS URIs of archives to be extracted in the working directory
  148. // of Spark drivers and tasks. Supported file types:
  149. // .jar, .tar, .tar.gz, .tgz, and .zip.
  150. repeated string archive_uris = 6;
  151. // [Optional] A mapping of property names to values, used to configure Spark.
  152. // Properties that conflict with values set by the Cloud Dataproc API may be
  153. // overwritten. Can include properties set in
  154. // /etc/spark/conf/spark-defaults.conf and classes in user code.
  155. map<string, string> properties = 7;
  156. // [Optional] The runtime log config for job execution.
  157. LoggingConfig logging_config = 8;
  158. }
  159. // A Cloud Dataproc job for running
  160. // [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
  161. // applications on YARN.
  162. message PySparkJob {
  163. // [Required] The HCFS URI of the main Python file to use as the driver. Must
  164. // be a .py file.
  165. string main_python_file_uri = 1;
  166. // [Optional] The arguments to pass to the driver. Do not include arguments,
  167. // such as `--conf`, that can be set as job properties, since a collision may
  168. // occur that causes an incorrect job submission.
  169. repeated string args = 2;
  170. // [Optional] HCFS file URIs of Python files to pass to the PySpark
  171. // framework. Supported file types: .py, .egg, and .zip.
  172. repeated string python_file_uris = 3;
  173. // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
  174. // Python driver and tasks.
  175. repeated string jar_file_uris = 4;
  176. // [Optional] HCFS URIs of files to be copied to the working directory of
  177. // Python drivers and distributed tasks. Useful for naively parallel tasks.
  178. repeated string file_uris = 5;
  179. // [Optional] HCFS URIs of archives to be extracted in the working directory of
  180. // .jar, .tar, .tar.gz, .tgz, and .zip.
  181. repeated string archive_uris = 6;
  182. // [Optional] A mapping of property names to values, used to configure PySpark.
  183. // Properties that conflict with values set by the Cloud Dataproc API may be
  184. // overwritten. Can include properties set in
  185. // /etc/spark/conf/spark-defaults.conf and classes in user code.
  186. map<string, string> properties = 7;
  187. // [Optional] The runtime log config for job execution.
  188. LoggingConfig logging_config = 8;
  189. }
  190. // A list of queries to run on a cluster.
  191. message QueryList {
  192. // [Required] The queries to execute. You do not need to terminate a query
  193. // with a semicolon. Multiple queries can be specified in one string
  194. // by separating each with a semicolon. Here is an example of an Cloud
  195. // Dataproc API snippet that uses a QueryList to specify a HiveJob:
  196. //
  197. // "hiveJob": {
  198. // "queryList": {
  199. // "queries": [
  200. // "query1",
  201. // "query2",
  202. // "query3;query4",
  203. // ]
  204. // }
  205. // }
  206. repeated string queries = 1;
  207. }
  208. // A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/)
  209. // queries on YARN.
  210. message HiveJob {
  211. // [Required] The sequence of Hive queries to execute, specified as either
  212. // an HCFS file URI or a list of queries.
  213. oneof queries {
  214. // The HCFS URI of the script that contains Hive queries.
  215. string query_file_uri = 1;
  216. // A list of queries.
  217. QueryList query_list = 2;
  218. }
  219. // [Optional] Whether to continue executing queries if a query fails.
  220. // The default value is `false`. Setting to `true` can be useful when executing
  221. // independent parallel queries.
  222. bool continue_on_failure = 3;
  223. // [Optional] Mapping of query variable names to values (equivalent to the
  224. // Hive command: `SET name="value";`).
  225. map<string, string> script_variables = 4;
  226. // [Optional] A mapping of property names and values, used to configure Hive.
  227. // Properties that conflict with values set by the Cloud Dataproc API may be
  228. // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
  229. // /etc/hive/conf/hive-site.xml, and classes in user code.
  230. map<string, string> properties = 5;
  231. // [Optional] HCFS URIs of jar files to add to the CLASSPATH of the
  232. // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
  233. // and UDFs.
  234. repeated string jar_file_uris = 6;
  235. }
  236. // A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/)
  237. // queries.
  238. message SparkSqlJob {
  239. // [Required] The sequence of Spark SQL queries to execute, specified as
  240. // either an HCFS file URI or as a list of queries.
  241. oneof queries {
  242. // The HCFS URI of the script that contains SQL queries.
  243. string query_file_uri = 1;
  244. // A list of queries.
  245. QueryList query_list = 2;
  246. }
  247. // [Optional] Mapping of query variable names to values (equivalent to the
  248. // Spark SQL command: SET `name="value";`).
  249. map<string, string> script_variables = 3;
  250. // [Optional] A mapping of property names to values, used to configure
  251. // Spark SQL's SparkConf. Properties that conflict with values set by the
  252. // Cloud Dataproc API may be overwritten.
  253. map<string, string> properties = 4;
  254. // [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
  255. repeated string jar_file_uris = 56;
  256. // [Optional] The runtime log config for job execution.
  257. LoggingConfig logging_config = 6;
  258. }
  259. // A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/)
  260. // queries on YARN.
  261. message PigJob {
  262. // [Required] The sequence of Pig queries to execute, specified as an HCFS
  263. // file URI or a list of queries.
  264. oneof queries {
  265. // The HCFS URI of the script that contains the Pig queries.
  266. string query_file_uri = 1;
  267. // A list of queries.
  268. QueryList query_list = 2;
  269. }
  270. // [Optional] Whether to continue executing queries if a query fails.
  271. // The default value is `false`. Setting to `true` can be useful when executing
  272. // independent parallel queries.
  273. bool continue_on_failure = 3;
  274. // [Optional] Mapping of query variable names to values (equivalent to the Pig
  275. // command: `name=[value]`).
  276. map<string, string> script_variables = 4;
  277. // [Optional] A mapping of property names to values, used to configure Pig.
  278. // Properties that conflict with values set by the Cloud Dataproc API may be
  279. // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
  280. // /etc/pig/conf/pig.properties, and classes in user code.
  281. map<string, string> properties = 5;
  282. // [Optional] HCFS URIs of jar files to add to the CLASSPATH of
  283. // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
  284. repeated string jar_file_uris = 6;
  285. // [Optional] The runtime log config for job execution.
  286. LoggingConfig logging_config = 7;
  287. }
  288. // Cloud Dataproc job config.
  289. message JobPlacement {
  290. // [Required] The name of the cluster where the job will be submitted.
  291. string cluster_name = 1;
  292. // [Output-only] A cluster UUID generated by the Cloud Dataproc service when
  293. // the job is submitted.
  294. string cluster_uuid = 2;
  295. }
  296. // Cloud Dataproc job status.
  297. message JobStatus {
  298. // The job state.
  299. enum State {
  300. // The job state is unknown.
  301. STATE_UNSPECIFIED = 0;
  302. // The job is pending; it has been submitted, but is not yet running.
  303. PENDING = 1;
  304. // Job has been received by the service and completed initial setup;
  305. // it will soon be submitted to the cluster.
  306. SETUP_DONE = 8;
  307. // The job is running on the cluster.
  308. RUNNING = 2;
  309. // A CancelJob request has been received, but is pending.
  310. CANCEL_PENDING = 3;
  311. // Transient in-flight resources have been canceled, and the request to
  312. // cancel the running job has been issued to the cluster.
  313. CANCEL_STARTED = 7;
  314. // The job cancellation was successful.
  315. CANCELLED = 4;
  316. // The job has completed successfully.
  317. DONE = 5;
  318. // The job has completed, but encountered an error.
  319. ERROR = 6;
  320. }
  321. // [Output-only] A state message specifying the overall job state.
  322. State state = 1;
  323. // [Output-only] Optional job state details, such as an error
  324. // description if the state is <code>ERROR</code>.
  325. string details = 2;
  326. // [Output-only] The time when this state was entered.
  327. google.protobuf.Timestamp state_start_time = 6;
  328. }
  329. // Encapsulates the full scoping used to reference a job.
  330. message JobReference {
  331. // [Required] The ID of the Google Cloud Platform project that the job
  332. // belongs to.
  333. string project_id = 1;
  334. // [Optional] The job ID, which must be unique within the project. The job ID
  335. // is generated by the server upon job submission or provided by the user as a
  336. // means to perform retries without creating duplicate jobs. The ID must
  337. // contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
  338. // hyphens (-). The maximum length is 512 characters.
  339. string job_id = 2;
  340. }
  341. // A Cloud Dataproc job resource.
  342. message Job {
  343. // [Optional] The fully qualified reference to the job, which can be used to
  344. // obtain the equivalent REST path of the job resource. If this property
  345. // is not specified when a job is created, the server generates a
  346. // <code>job_id</code>.
  347. JobReference reference = 1;
  348. // [Required] Job information, including how, when, and where to
  349. // run the job.
  350. JobPlacement placement = 2;
  351. // [Required] The application/framework-specific portion of the job.
  352. oneof type_job {
  353. // Job is a Hadoop job.
  354. HadoopJob hadoop_job = 3;
  355. // Job is a Spark job.
  356. SparkJob spark_job = 4;
  357. // Job is a Pyspark job.
  358. PySparkJob pyspark_job = 5;
  359. // Job is a Hive job.
  360. HiveJob hive_job = 6;
  361. // Job is a Pig job.
  362. PigJob pig_job = 7;
  363. // Job is a SparkSql job.
  364. SparkSqlJob spark_sql_job = 12;
  365. }
  366. // [Output-only] The job status. Additional application-specific
  367. // status information may be contained in the <code>type_job</code>
  368. // and <code>yarn_applications</code> fields.
  369. JobStatus status = 8;
  370. // [Output-only] The previous job status.
  371. repeated JobStatus status_history = 13;
  372. // [Output-only] A URI pointing to the location of the stdout of the job's
  373. // driver program.
  374. string driver_output_resource_uri = 17;
  375. // [Output-only] If present, the location of miscellaneous control files
  376. // which may be used as part of job setup and handling. If not present,
  377. // control files may be placed in the same location as `driver_output_uri`.
  378. string driver_control_files_uri = 15;
  379. }
  380. // A request to submit a job.
  381. message SubmitJobRequest {
  382. // [Required] The ID of the Google Cloud Platform project that the job
  383. // belongs to.
  384. string project_id = 1;
  385. // [Required] The Cloud Dataproc region in which to handle the request.
  386. string region = 3;
  387. // [Required] The job resource.
  388. Job job = 2;
  389. }
  390. // A request to get the resource representation for a job in a project.
  391. message GetJobRequest {
  392. // [Required] The ID of the Google Cloud Platform project that the job
  393. // belongs to.
  394. string project_id = 1;
  395. // [Required] The Cloud Dataproc region in which to handle the request.
  396. string region = 3;
  397. // [Required] The job ID.
  398. string job_id = 2;
  399. }
  400. // A request to list jobs in a project.
  401. message ListJobsRequest {
  402. // A matcher that specifies categories of job states.
  403. enum JobStateMatcher {
  404. // Match all jobs, regardless of state.
  405. ALL = 0;
  406. // Only match jobs in non-terminal states: PENDING, RUNNING, or
  407. // CANCEL_PENDING.
  408. ACTIVE = 1;
  409. // Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
  410. NON_ACTIVE = 2;
  411. }
  412. // [Required] The ID of the Google Cloud Platform project that the job
  413. // belongs to.
  414. string project_id = 1;
  415. // [Required] The Cloud Dataproc region in which to handle the request.
  416. string region = 6;
  417. // [Optional] The number of results to return in each response.
  418. int32 page_size = 2;
  419. // [Optional] The page token, returned by a previous call, to request the
  420. // next page of results.
  421. string page_token = 3;
  422. // [Optional] If set, the returned jobs list includes only jobs that were
  423. // submitted to the named cluster.
  424. string cluster_name = 4;
  425. // [Optional] Specifies enumerated categories of jobs to list
  426. // (default = match ALL jobs).
  427. JobStateMatcher job_state_matcher = 5;
  428. }
  429. // A list of jobs in a project.
  430. message ListJobsResponse {
  431. // [Output-only] Jobs list.
  432. repeated Job jobs = 1;
  433. // [Optional] This token is included in the response if there are more results
  434. // to fetch. To fetch additional results, provide this value as the
  435. // `page_token` in a subsequent <code>ListJobsRequest</code>.
  436. string next_page_token = 2;
  437. }
  438. // A request to cancel a job.
  439. message CancelJobRequest {
  440. // [Required] The ID of the Google Cloud Platform project that the job
  441. // belongs to.
  442. string project_id = 1;
  443. // [Required] The Cloud Dataproc region in which to handle the request.
  444. string region = 3;
  445. // [Required] The job ID.
  446. string job_id = 2;
  447. }
  448. // A request to delete a job.
  449. message DeleteJobRequest {
  450. // [Required] The ID of the Google Cloud Platform project that the job
  451. // belongs to.
  452. string project_id = 1;
  453. // [Required] The Cloud Dataproc region in which to handle the request.
  454. string region = 3;
  455. // [Required] The job ID.
  456. string job_id = 2;
  457. }