job_service.proto 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.ml.v1beta1;
  16. import "google/api/annotations.proto";
  17. import "google/protobuf/empty.proto";
  18. import "google/protobuf/timestamp.proto";
  19. option java_multiple_files = true;
  20. option java_outer_classname = "JobServiceProto";
  21. option java_package = "com.google.cloud.ml.api.v1beta1";
  22. // Copyright 2016 Google Inc. All Rights Reserved.
  23. //
  24. // Proto file for the Machine Learning Service
  25. // Describes the 'job service' to manage training and prediction jobs.
  26. // Allows creating and managing training and prediction jobs.
  27. service JobService {
  28. // Create a training or a prediction job.
  29. rpc CreateJob(CreateJobRequest) returns (Job) {
  30. option (google.api.http) = { post: "/v1beta1/{parent=projects/*}/jobs" body: "job" };
  31. }
  32. // List jobs in the project.
  33. rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
  34. option (google.api.http) = { get: "/v1beta1/{parent=projects/*}/jobs" };
  35. }
  36. // Describe a job.
  37. rpc GetJob(GetJobRequest) returns (Job) {
  38. option (google.api.http) = { get: "/v1beta1/{name=projects/*/jobs/*}" };
  39. }
  40. // Cancel a running job.
  41. rpc CancelJob(CancelJobRequest) returns (google.protobuf.Empty) {
  42. option (google.api.http) = { post: "/v1beta1/{name=projects/*/jobs/*}:cancel" body: "*" };
  43. }
  44. }
  45. // Represents input parameters for a training job.
  46. message TrainingInput {
  47. // Scale tiers.
  48. enum ScaleTier {
  49. // A single worker instance and no parameter servers.
  50. BASIC = 0;
  51. // A few workers and one parameter server.
  52. STANDARD_1 = 1;
  53. // A medium amount of workers and a few parameter servers.
  54. STANDARD_2 = 2;
  55. // A large amount of worker with more parameter servers.
  56. PREMIUM_1 = 3;
  57. // A very large amount of workers with even more parameter servers.
  58. PREMIUM_2 = 4;
  59. // Specify your own amounts of replicas in the `worker_count` and
  60. // `parameter_server_count` fields, as well as machine types for the master,
  61. // the workers and the parameter servers.
  62. CUSTOM = 5;
  63. }
  64. // Required. Specifies the machine types, the amounts of replicas for workers
  65. // and parameter servers.
  66. ScaleTier scale_tier = 1;
  67. // Optional. Specifies the master machine type.
  68. // The following types are supported:
  69. //
  70. // - `standard`
  71. // - `large_model`
  72. // - `complex_model_s`
  73. // - `complex_model_m`
  74. // - `complex_model_l`
  75. //
  76. // Cannot be used in combination with a standard scale tier.
  77. string master_type = 2;
  78. // Optional. Specifies the worker machine type.
  79. // The following types are supported:
  80. //
  81. // - `standard`
  82. // - `large_model`
  83. // - `complex_model_s`
  84. // - `complex_model_m`
  85. // - `complex_model_l`
  86. //
  87. // Cannot be used in combination with a standard scale tier.
  88. string worker_type = 3;
  89. // Optional. Specifies the parameter server machine type.
  90. // The following types are supported:
  91. //
  92. // - `standard`
  93. // - `large_model`
  94. // - `complex_model_s`
  95. // - `complex_model_m`
  96. // - `complex_model_l`
  97. //
  98. // Cannot be used in combination with a standard scale tier.
  99. string parameter_server_type = 4;
  100. // Optional. Specifies the required amount of worker replicas.
  101. // Cannot be used in combination with a standard scale tier.
  102. int64 worker_count = 5;
  103. // Optional. Specifies the required amount of parameter server replicas.
  104. // Cannot be used in combination with a standard scale tier.
  105. int64 parameter_server_count = 6;
  106. // Required. The Google Cloud Storage location of the packages with
  107. // the training program and any additional dependencies.
  108. repeated string package_uris = 7;
  109. // Required. The Python module name to run after installing the packages.
  110. string python_module = 8;
  111. // Optional. Command line arguments to pass to the program.
  112. repeated string args = 10;
  113. // Optional. The set of Hyperparameters to tune.
  114. HyperparameterSpec hyperparameters = 12;
  115. // Required. The Google Compute Engine region to run the training job in.
  116. string region = 14;
  117. }
  118. // Represents a set of hyperparameters to optimize.
  119. message HyperparameterSpec {
  120. // The optimization goal of the objective value.
  121. enum GoalType {
  122. // Goal Type will default to maximize.
  123. GOAL_TYPE_UNSPECIFIED = 0;
  124. // Maximize the goal metric.
  125. MAXIMIZE = 1;
  126. // Minimize the goal metric.
  127. MINIMIZE = 2;
  128. }
  129. // Required. Should the evaluation metric be maximized or minimized?
  130. GoalType goal = 1;
  131. // Required. The set of parameters to tune.
  132. repeated ParameterSpec params = 2;
  133. // Optional. How many training trials should be attempted to optimize.
  134. // Defaults to one.
  135. int32 max_trials = 3;
  136. // Optional. How many training trials should be run in parallel.
  137. // More parallelization will be faster, but parallel trials only benefit
  138. // from the information gained by previous trials.
  139. // Each trial will use the same scale tier and machine types.
  140. // Defaults to one.
  141. int32 max_parallel_trials = 4;
  142. }
  143. // Represents a single hyperparameter to optimize.
  144. message ParameterSpec {
  145. // The type of the parameter.
  146. enum ParameterType {
  147. // Parameter type must be specified. Unspecified values will be treated
  148. // as an error.
  149. PARAMETER_TYPE_UNSPECIFIED = 0;
  150. // Type for real-valued parameters.
  151. DOUBLE = 1;
  152. // Type for integral parameters.
  153. INTEGER = 2;
  154. // The parameter is categorical, with a value chosen from the categories
  155. // field.
  156. CATEGORICAL = 3;
  157. // The parameter is real valued, with a fixed set of feasible points. If
  158. // `type==DISCRETE`, feasible_points must be provided, and
  159. // {`min_value`, `max_value`} will be ignored.
  160. DISCRETE = 4;
  161. }
  162. // The type of scaling that should be applied to this parameter.
  163. enum ScaleType {
  164. // By default, no scaling is applied.
  165. NONE = 0;
  166. // Scales the feasible space to (0, 1) linearly.
  167. UNIT_LINEAR_SCALE = 1;
  168. // Scales the feasible space logarithmically to (0, 1). The entire feasible
  169. // space must be strictly positive.
  170. UNIT_LOG_SCALE = 2;
  171. // Scales the feasible space "reverse" logarithmically to (0, 1). The result
  172. // is that values close to the top of the feasible space are spread out more
  173. // than points near the bottom. The entire feasible space must be strictly
  174. // positive.
  175. UNIT_REVERSE_LOG_SCALE = 3;
  176. }
  177. // Required. The parameter name must be unique amongst all ParameterConfigs in
  178. // a HyperparameterSpec message. E.g., "learning_rate".
  179. string parameter_name = 1;
  180. // Required. The type of the parameter.
  181. ParameterType type = 4;
  182. // Required if type is `DOUBLE` or `INTEGER`. This field
  183. // should be unset if type is `CATEGORICAL`. This value should be integers if
  184. // type is INTEGER.
  185. double min_value = 2;
  186. // Required if typeis `DOUBLE` or `INTEGER`. This field
  187. // should be unset if type is `CATEGORICAL`. This value should be integers if
  188. // type is `INTEGER`.
  189. double max_value = 3;
  190. // Required if type is `CATEGORICAL`. The list of possible categories.
  191. repeated string categorical_values = 5;
  192. // Required if type is `DISCRETE`.
  193. // A list of feasible points.
  194. // The list should be in strictly increasing order. For instance, this
  195. // parameter might have possible settings of 1.5, 2.5, and 4.0. This list
  196. // shouldn't be too large - probably not more than 1,000 points.
  197. repeated double discrete_values = 6;
  198. // Optional. How the parameter should be scaled to the hypercube.
  199. // Leave unset for categorical parameters.
  200. // Some kind of scaling is strongly recommended for real or integral
  201. // parameters (e.g., `UNIT_LINEAR_SCALE`).
  202. ScaleType scale_type = 7;
  203. }
  204. // Represents the result of a hyperparameter tuning trial from a training job.
  205. message HyperparameterOutput {
  206. // An observed value of a metric.
  207. message HyperparameterMetric {
  208. // The global training step for this metric.
  209. int64 training_step = 1;
  210. // The objective value at this training step.
  211. double objective_value = 2;
  212. }
  213. // The trial id for these results.
  214. string trial_id = 1;
  215. // The hyperparameters given to this trial.
  216. map<string, string> hyperparameters = 2;
  217. // The final objective metric seen for this trial.
  218. HyperparameterMetric final_metric = 3;
  219. // All recorded object metrics for this trial.
  220. repeated HyperparameterMetric all_metrics = 4;
  221. }
  222. // Represents results of a training job.
  223. message TrainingOutput {
  224. // The number of tuning trials completed successfully.
  225. int64 completed_trial_count = 1;
  226. // Results for individual Hyperparameter trials.
  227. repeated HyperparameterOutput trials = 2;
  228. }
  229. // Represents input parameters for a prediction job.
  230. message PredictionInput {
  231. // The format used to separate data instances in the source files.
  232. enum DataFormat {
  233. // Unspecified format.
  234. DATA_FORMAT_UNSPECIFIED = 0;
  235. // The source file is a text file with instances separated by the
  236. // new-line character.
  237. TEXT = 1;
  238. // The source file is a TFRecord file.
  239. TF_RECORD = 2;
  240. }
  241. // Required. The model or the version to use for prediction.
  242. oneof model_version {
  243. // The name of the model. The default version will be used.
  244. // E.g "project/your_project/models/your_model"
  245. string model_name = 1;
  246. // The version to be used.
  247. // E.g "project/your_project/models/your_model/versions/your_version"
  248. string version_name = 2;
  249. }
  250. // Required. The format of the input data files.
  251. DataFormat data_format = 3;
  252. // Required. The Google Cloud Storage location of the input data files.
  253. // May contain wildcards.
  254. repeated string input_paths = 4;
  255. // Required. The output Google Cloud Storage location.
  256. string output_path = 5;
  257. // Optional. The maximum amount of workers to be used for parallel processing.
  258. // Defaults to 10.
  259. int64 max_worker_count = 6;
  260. // Required. The Google Compute Engine region to run the prediction job in.
  261. string region = 7;
  262. }
  263. // Represents results of a prediction job.
  264. message PredictionOutput {
  265. // The output Google Cloud Storage location provided at the job creation time.
  266. string output_path = 1;
  267. // The number of generated predictions.
  268. int64 prediction_count = 2;
  269. // The number of data instances which resulted in errors.
  270. int64 error_count = 3;
  271. }
  272. // Represents a training or prediction job.
  273. message Job {
  274. // Describes the job state.
  275. enum State {
  276. // The job state is unspecified.
  277. STATE_UNSPECIFIED = 0;
  278. // The job has been just created and is awaiting to be processed.
  279. QUEUED = 1;
  280. // The job is being prepared to run.
  281. PREPARING = 2;
  282. // Training or prediction is in progress.
  283. RUNNING = 3;
  284. // The job completed successfully.
  285. SUCCEEDED = 4;
  286. // The job failed.
  287. // `error_message` should contain the details of the failure.
  288. FAILED = 5;
  289. // The job is being cancelled.
  290. // `error_message` should describe the reason for the cancellation.
  291. CANCELLING = 6;
  292. // The job has been cancelled.
  293. // `error_message` should describe the reason for the cancellation.
  294. CANCELLED = 7;
  295. }
  296. // Required. The user-specified id of the job.
  297. string job_id = 1;
  298. // Required. Parameters to create a job.
  299. oneof input {
  300. // Input parameters to create a training job.
  301. TrainingInput training_input = 2;
  302. // Input parameters to create a prediction job.
  303. PredictionInput prediction_input = 3;
  304. }
  305. // Output only. When the job was created.
  306. google.protobuf.Timestamp create_time = 4;
  307. // Output only. When the job processing was started.
  308. google.protobuf.Timestamp start_time = 5;
  309. // Output only. When the job processing was completed.
  310. google.protobuf.Timestamp end_time = 6;
  311. // Output only. The detailed state of a job.
  312. State state = 7;
  313. // Output only. The details of a failure or a cancellation.
  314. string error_message = 8;
  315. // Output only. The current result of the job.
  316. oneof output {
  317. // The current training job result.
  318. TrainingOutput training_output = 9;
  319. // The current prediction job result.
  320. PredictionOutput prediction_output = 10;
  321. }
  322. }
  323. // Request message for the CreateJob method.
  324. message CreateJobRequest {
  325. // Required. The project name.
  326. // Authorization: requires `Editor` role on the specified project.
  327. string parent = 1;
  328. // Required. The job to create.
  329. Job job = 2;
  330. }
  331. // Request message for the ListJobs method.
  332. message ListJobsRequest {
  333. // Required. The name of the project whose jobs are to be listed.
  334. // Authorization: requires `Viewer` role on the specified project.
  335. string parent = 1;
  336. // Optional. Specifies the subset of jobs to retrieve.
  337. string filter = 2;
  338. // Optional. Specifies the ordering of the jobs.
  339. string order_by = 3;
  340. // Optional. A token for continuing the enumeration.
  341. string page_token = 4;
  342. // Optional. The page size.
  343. int32 page_size = 5;
  344. }
  345. // Response message for the ListJobs method.
  346. message ListJobsResponse {
  347. // The list of jobs.
  348. repeated Job jobs = 1;
  349. // Optional pagination token to use for retrieving the next page of results.
  350. string next_page_token = 2;
  351. }
  352. // Request message for the GetJob method.
  353. message GetJobRequest {
  354. // Required. The name of the job.
  355. // Authorization: requires `Viewer` role on the parent project.
  356. string name = 1;
  357. }
  358. // Request message for the CancelJob method.
  359. message CancelJobRequest {
  360. // Required. The name of the job.
  361. // Authorization: requires `Editor` role on the parent project.
  362. string name = 1;
  363. }