data.proto 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.bigtable.v2;
  16. option java_multiple_files = true;
  17. option java_outer_classname = "DataProto";
  18. option java_package = "com.google.bigtable.v2";
  19. // Specifies the complete (requested) contents of a single row of a table.
  20. // Rows which exceed 256MiB in size cannot be read in full.
  21. message Row {
  22. // The unique key which identifies this row within its table. This is the same
  23. // key that's used to identify the row in, for example, a MutateRowRequest.
  24. // May contain any non-empty byte string up to 4KiB in length.
  25. bytes key = 1;
  26. // May be empty, but only if the entire row is empty.
  27. // The mutual ordering of column families is not specified.
  28. repeated Family families = 2;
  29. }
  30. // Specifies (some of) the contents of a single row/column family intersection
  31. // of a table.
  32. message Family {
  33. // The unique key which identifies this family within its row. This is the
  34. // same key that's used to identify the family in, for example, a RowFilter
  35. // which sets its "family_name_regex_filter" field.
  36. // Must match `[-_.a-zA-Z0-9]+`, except that AggregatingRowProcessors may
  37. // produce cells in a sentinel family with an empty name.
  38. // Must be no greater than 64 characters in length.
  39. string name = 1;
  40. // Must not be empty. Sorted in order of increasing "qualifier".
  41. repeated Column columns = 2;
  42. }
  43. // Specifies (some of) the contents of a single row/column intersection of a
  44. // table.
  45. message Column {
  46. // The unique key which identifies this column within its family. This is the
  47. // same key that's used to identify the column in, for example, a RowFilter
  48. // which sets its `column_qualifier_regex_filter` field.
  49. // May contain any byte string, including the empty string, up to 16kiB in
  50. // length.
  51. bytes qualifier = 1;
  52. // Must not be empty. Sorted in order of decreasing "timestamp_micros".
  53. repeated Cell cells = 2;
  54. }
  55. // Specifies (some of) the contents of a single row/column/timestamp of a table.
  56. message Cell {
  57. // The cell's stored timestamp, which also uniquely identifies it within
  58. // its column.
  59. // Values are always expressed in microseconds, but individual tables may set
  60. // a coarser granularity to further restrict the allowed values. For
  61. // example, a table which specifies millisecond granularity will only allow
  62. // values of `timestamp_micros` which are multiples of 1000.
  63. int64 timestamp_micros = 1;
  64. // The value stored in the cell.
  65. // May contain any byte string, including the empty string, up to 100MiB in
  66. // length.
  67. bytes value = 2;
  68. // Labels applied to the cell by a [RowFilter][google.bigtable.v2.RowFilter].
  69. repeated string labels = 3;
  70. }
  71. // Specifies a contiguous range of rows.
  72. message RowRange {
  73. // The row key at which to start the range.
  74. // If neither field is set, interpreted as the empty string, inclusive.
  75. oneof start_key {
  76. // Used when giving an inclusive lower bound for the range.
  77. bytes start_key_closed = 1;
  78. // Used when giving an exclusive lower bound for the range.
  79. bytes start_key_open = 2;
  80. }
  81. // The row key at which to end the range.
  82. // If neither field is set, interpreted as the infinite row key, exclusive.
  83. oneof end_key {
  84. // Used when giving an inclusive upper bound for the range.
  85. bytes end_key_open = 3;
  86. // Used when giving an exclusive upper bound for the range.
  87. bytes end_key_closed = 4;
  88. }
  89. }
  90. // Specifies a non-contiguous set of rows.
  91. message RowSet {
  92. // Single rows included in the set.
  93. repeated bytes row_keys = 1;
  94. // Contiguous row ranges included in the set.
  95. repeated RowRange row_ranges = 2;
  96. }
  97. // Specifies a contiguous range of columns within a single column family.
  98. // The range spans from <column_family>:<start_qualifier> to
  99. // <column_family>:<end_qualifier>, where both bounds can be either
  100. // inclusive or exclusive.
  101. message ColumnRange {
  102. // The name of the column family within which this range falls.
  103. string family_name = 1;
  104. // The column qualifier at which to start the range (within `column_family`).
  105. // If neither field is set, interpreted as the empty string, inclusive.
  106. oneof start_qualifier {
  107. // Used when giving an inclusive lower bound for the range.
  108. bytes start_qualifier_closed = 2;
  109. // Used when giving an exclusive lower bound for the range.
  110. bytes start_qualifier_open = 3;
  111. }
  112. // The column qualifier at which to end the range (within `column_family`).
  113. // If neither field is set, interpreted as the infinite string, exclusive.
  114. oneof end_qualifier {
  115. // Used when giving an inclusive upper bound for the range.
  116. bytes end_qualifier_closed = 4;
  117. // Used when giving an exclusive upper bound for the range.
  118. bytes end_qualifier_open = 5;
  119. }
  120. }
  121. // Specified a contiguous range of microsecond timestamps.
  122. message TimestampRange {
  123. // Inclusive lower bound. If left empty, interpreted as 0.
  124. int64 start_timestamp_micros = 1;
  125. // Exclusive upper bound. If left empty, interpreted as infinity.
  126. int64 end_timestamp_micros = 2;
  127. }
  128. // Specifies a contiguous range of raw byte values.
  129. message ValueRange {
  130. // The value at which to start the range.
  131. // If neither field is set, interpreted as the empty string, inclusive.
  132. oneof start_value {
  133. // Used when giving an inclusive lower bound for the range.
  134. bytes start_value_closed = 1;
  135. // Used when giving an exclusive lower bound for the range.
  136. bytes start_value_open = 2;
  137. }
  138. // The value at which to end the range.
  139. // If neither field is set, interpreted as the infinite string, exclusive.
  140. oneof end_value {
  141. // Used when giving an inclusive upper bound for the range.
  142. bytes end_value_closed = 3;
  143. // Used when giving an exclusive upper bound for the range.
  144. bytes end_value_open = 4;
  145. }
  146. }
  147. // Takes a row as input and produces an alternate view of the row based on
  148. // specified rules. For example, a RowFilter might trim down a row to include
  149. // just the cells from columns matching a given regular expression, or might
  150. // return all the cells of a row but not their values. More complicated filters
  151. // can be composed out of these components to express requests such as, "within
  152. // every column of a particular family, give just the two most recent cells
  153. // which are older than timestamp X."
  154. //
  155. // There are two broad categories of RowFilters (true filters and transformers),
  156. // as well as two ways to compose simple filters into more complex ones
  157. // (chains and interleaves). They work as follows:
  158. //
  159. // * True filters alter the input row by excluding some of its cells wholesale
  160. // from the output row. An example of a true filter is the `value_regex_filter`,
  161. // which excludes cells whose values don't match the specified pattern. All
  162. // regex true filters use RE2 syntax (https://github.com/google/re2/wiki/Syntax)
  163. // in raw byte mode (RE2::Latin1), and are evaluated as full matches. An
  164. // important point to keep in mind is that `RE2(.)` is equivalent by default to
  165. // `RE2([^\n])`, meaning that it does not match newlines. When attempting to
  166. // match an arbitrary byte, you should therefore use the escape sequence `\C`,
  167. // which may need to be further escaped as `\\C` in your client language.
  168. //
  169. // * Transformers alter the input row by changing the values of some of its
  170. // cells in the output, without excluding them completely. Currently, the only
  171. // supported transformer is the `strip_value_transformer`, which replaces every
  172. // cell's value with the empty string.
  173. //
  174. // * Chains and interleaves are described in more detail in the
  175. // RowFilter.Chain and RowFilter.Interleave documentation.
  176. //
  177. // The total serialized size of a RowFilter message must not
  178. // exceed 4096 bytes, and RowFilters may not be nested within each other
  179. // (in Chains or Interleaves) to a depth of more than 20.
  180. message RowFilter {
  181. // A RowFilter which sends rows through several RowFilters in sequence.
  182. message Chain {
  183. // The elements of "filters" are chained together to process the input row:
  184. // in row -> f(0) -> intermediate row -> f(1) -> ... -> f(N) -> out row
  185. // The full chain is executed atomically.
  186. repeated RowFilter filters = 1;
  187. }
  188. // A RowFilter which sends each row to each of several component
  189. // RowFilters and interleaves the results.
  190. message Interleave {
  191. // The elements of "filters" all process a copy of the input row, and the
  192. // results are pooled, sorted, and combined into a single output row.
  193. // If multiple cells are produced with the same column and timestamp,
  194. // they will all appear in the output row in an unspecified mutual order.
  195. // Consider the following example, with three filters:
  196. //
  197. // input row
  198. // |
  199. // -----------------------------------------------------
  200. // | | |
  201. // f(0) f(1) f(2)
  202. // | | |
  203. // 1: foo,bar,10,x foo,bar,10,z far,bar,7,a
  204. // 2: foo,blah,11,z far,blah,5,x far,blah,5,x
  205. // | | |
  206. // -----------------------------------------------------
  207. // |
  208. // 1: foo,bar,10,z // could have switched with #2
  209. // 2: foo,bar,10,x // could have switched with #1
  210. // 3: foo,blah,11,z
  211. // 4: far,bar,7,a
  212. // 5: far,blah,5,x // identical to #6
  213. // 6: far,blah,5,x // identical to #5
  214. //
  215. // All interleaved filters are executed atomically.
  216. repeated RowFilter filters = 1;
  217. }
  218. // A RowFilter which evaluates one of two possible RowFilters, depending on
  219. // whether or not a predicate RowFilter outputs any cells from the input row.
  220. //
  221. // IMPORTANT NOTE: The predicate filter does not execute atomically with the
  222. // true and false filters, which may lead to inconsistent or unexpected
  223. // results. Additionally, Condition filters have poor performance, especially
  224. // when filters are set for the false condition.
  225. message Condition {
  226. // If `predicate_filter` outputs any cells, then `true_filter` will be
  227. // evaluated on the input row. Otherwise, `false_filter` will be evaluated.
  228. RowFilter predicate_filter = 1;
  229. // The filter to apply to the input row if `predicate_filter` returns any
  230. // results. If not provided, no results will be returned in the true case.
  231. RowFilter true_filter = 2;
  232. // The filter to apply to the input row if `predicate_filter` does not
  233. // return any results. If not provided, no results will be returned in the
  234. // false case.
  235. RowFilter false_filter = 3;
  236. }
  237. // Which of the possible RowFilter types to apply. If none are set, this
  238. // RowFilter returns all cells in the input row.
  239. oneof filter {
  240. // Applies several RowFilters to the data in sequence, progressively
  241. // narrowing the results.
  242. Chain chain = 1;
  243. // Applies several RowFilters to the data in parallel and combines the
  244. // results.
  245. Interleave interleave = 2;
  246. // Applies one of two possible RowFilters to the data based on the output of
  247. // a predicate RowFilter.
  248. Condition condition = 3;
  249. // ADVANCED USE ONLY.
  250. // Hook for introspection into the RowFilter. Outputs all cells directly to
  251. // the output of the read rather than to any parent filter. Consider the
  252. // following example:
  253. //
  254. // Chain(
  255. // FamilyRegex("A"),
  256. // Interleave(
  257. // All(),
  258. // Chain(Label("foo"), Sink())
  259. // ),
  260. // QualifierRegex("B")
  261. // )
  262. //
  263. // A,A,1,w
  264. // A,B,2,x
  265. // B,B,4,z
  266. // |
  267. // FamilyRegex("A")
  268. // |
  269. // A,A,1,w
  270. // A,B,2,x
  271. // |
  272. // +------------+-------------+
  273. // | |
  274. // All() Label(foo)
  275. // | |
  276. // A,A,1,w A,A,1,w,labels:[foo]
  277. // A,B,2,x A,B,2,x,labels:[foo]
  278. // | |
  279. // | Sink() --------------+
  280. // | | |
  281. // +------------+ x------+ A,A,1,w,labels:[foo]
  282. // | A,B,2,x,labels:[foo]
  283. // A,A,1,w |
  284. // A,B,2,x |
  285. // | |
  286. // QualifierRegex("B") |
  287. // | |
  288. // A,B,2,x |
  289. // | |
  290. // +--------------------------------+
  291. // |
  292. // A,A,1,w,labels:[foo]
  293. // A,B,2,x,labels:[foo] // could be switched
  294. // A,B,2,x // could be switched
  295. //
  296. // Despite being excluded by the qualifier filter, a copy of every cell
  297. // that reaches the sink is present in the final result.
  298. //
  299. // As with an [Interleave][google.bigtable.v2.RowFilter.Interleave],
  300. // duplicate cells are possible, and appear in an unspecified mutual order.
  301. // In this case we have a duplicate with column "A:B" and timestamp 2,
  302. // because one copy passed through the all filter while the other was
  303. // passed through the label and sink. Note that one copy has label "foo",
  304. // while the other does not.
  305. //
  306. // Cannot be used within the `predicate_filter`, `true_filter`, or
  307. // `false_filter` of a [Condition][google.bigtable.v2.RowFilter.Condition].
  308. bool sink = 16;
  309. // Matches all cells, regardless of input. Functionally equivalent to
  310. // leaving `filter` unset, but included for completeness.
  311. bool pass_all_filter = 17;
  312. // Does not match any cells, regardless of input. Useful for temporarily
  313. // disabling just part of a filter.
  314. bool block_all_filter = 18;
  315. // Matches only cells from rows whose keys satisfy the given RE2 regex. In
  316. // other words, passes through the entire row when the key matches, and
  317. // otherwise produces an empty row.
  318. // Note that, since row keys can contain arbitrary bytes, the `\C` escape
  319. // sequence must be used if a true wildcard is desired. The `.` character
  320. // will not match the new line character `\n`, which may be present in a
  321. // binary key.
  322. bytes row_key_regex_filter = 4;
  323. // Matches all cells from a row with probability p, and matches no cells
  324. // from the row with probability 1-p.
  325. double row_sample_filter = 14;
  326. // Matches only cells from columns whose families satisfy the given RE2
  327. // regex. For technical reasons, the regex must not contain the `:`
  328. // character, even if it is not being used as a literal.
  329. // Note that, since column families cannot contain the new line character
  330. // `\n`, it is sufficient to use `.` as a full wildcard when matching
  331. // column family names.
  332. string family_name_regex_filter = 5;
  333. // Matches only cells from columns whose qualifiers satisfy the given RE2
  334. // regex.
  335. // Note that, since column qualifiers can contain arbitrary bytes, the `\C`
  336. // escape sequence must be used if a true wildcard is desired. The `.`
  337. // character will not match the new line character `\n`, which may be
  338. // present in a binary qualifier.
  339. bytes column_qualifier_regex_filter = 6;
  340. // Matches only cells from columns within the given range.
  341. ColumnRange column_range_filter = 7;
  342. // Matches only cells with timestamps within the given range.
  343. TimestampRange timestamp_range_filter = 8;
  344. // Matches only cells with values that satisfy the given regular expression.
  345. // Note that, since cell values can contain arbitrary bytes, the `\C` escape
  346. // sequence must be used if a true wildcard is desired. The `.` character
  347. // will not match the new line character `\n`, which may be present in a
  348. // binary value.
  349. bytes value_regex_filter = 9;
  350. // Matches only cells with values that fall within the given range.
  351. ValueRange value_range_filter = 15;
  352. // Skips the first N cells of each row, matching all subsequent cells.
  353. // If duplicate cells are present, as is possible when using an Interleave,
  354. // each copy of the cell is counted separately.
  355. int32 cells_per_row_offset_filter = 10;
  356. // Matches only the first N cells of each row.
  357. // If duplicate cells are present, as is possible when using an Interleave,
  358. // each copy of the cell is counted separately.
  359. int32 cells_per_row_limit_filter = 11;
  360. // Matches only the most recent N cells within each column. For example,
  361. // if N=2, this filter would match column `foo:bar` at timestamps 10 and 9,
  362. // skip all earlier cells in `foo:bar`, and then begin matching again in
  363. // column `foo:bar2`.
  364. // If duplicate cells are present, as is possible when using an Interleave,
  365. // each copy of the cell is counted separately.
  366. int32 cells_per_column_limit_filter = 12;
  367. // Replaces each cell's value with the empty string.
  368. bool strip_value_transformer = 13;
  369. // Applies the given label to all cells in the output row. This allows
  370. // the client to determine which results were produced from which part of
  371. // the filter.
  372. //
  373. // Values must be at most 15 characters in length, and match the RE2
  374. // pattern `[a-z0-9\\-]+`
  375. //
  376. // Due to a technical limitation, it is not currently possible to apply
  377. // multiple labels to a cell. As a result, a Chain may have no more than
  378. // one sub-filter which contains a `apply_label_transformer`. It is okay for
  379. // an Interleave to contain multiple `apply_label_transformers`, as they
  380. // will be applied to separate copies of the input. This may be relaxed in
  381. // the future.
  382. string apply_label_transformer = 19;
  383. }
  384. }
  385. // Specifies a particular change to be made to the contents of a row.
  386. message Mutation {
  387. // A Mutation which sets the value of the specified cell.
  388. message SetCell {
  389. // The name of the family into which new data should be written.
  390. // Must match `[-_.a-zA-Z0-9]+`
  391. string family_name = 1;
  392. // The qualifier of the column into which new data should be written.
  393. // Can be any byte string, including the empty string.
  394. bytes column_qualifier = 2;
  395. // The timestamp of the cell into which new data should be written.
  396. // Use -1 for current Bigtable server time.
  397. // Otherwise, the client should set this value itself, noting that the
  398. // default value is a timestamp of zero if the field is left unspecified.
  399. // Values must match the granularity of the table (e.g. micros, millis).
  400. int64 timestamp_micros = 3;
  401. // The value to be written into the specified cell.
  402. bytes value = 4;
  403. }
  404. // A Mutation which deletes cells from the specified column, optionally
  405. // restricting the deletions to a given timestamp range.
  406. message DeleteFromColumn {
  407. // The name of the family from which cells should be deleted.
  408. // Must match `[-_.a-zA-Z0-9]+`
  409. string family_name = 1;
  410. // The qualifier of the column from which cells should be deleted.
  411. // Can be any byte string, including the empty string.
  412. bytes column_qualifier = 2;
  413. // The range of timestamps within which cells should be deleted.
  414. TimestampRange time_range = 3;
  415. }
  416. // A Mutation which deletes all cells from the specified column family.
  417. message DeleteFromFamily {
  418. // The name of the family from which cells should be deleted.
  419. // Must match `[-_.a-zA-Z0-9]+`
  420. string family_name = 1;
  421. }
  422. // A Mutation which deletes all cells from the containing row.
  423. message DeleteFromRow {
  424. }
  425. // Which of the possible Mutation types to apply.
  426. oneof mutation {
  427. // Set a cell's value.
  428. SetCell set_cell = 1;
  429. // Deletes cells from a column.
  430. DeleteFromColumn delete_from_column = 2;
  431. // Deletes cells from a column family.
  432. DeleteFromFamily delete_from_family = 3;
  433. // Deletes cells from the entire row.
  434. DeleteFromRow delete_from_row = 4;
  435. }
  436. }
  437. // Specifies an atomic read/modify/write operation on the latest value of the
  438. // specified column.
  439. message ReadModifyWriteRule {
  440. // The name of the family to which the read/modify/write should be applied.
  441. // Must match `[-_.a-zA-Z0-9]+`
  442. string family_name = 1;
  443. // The qualifier of the column to which the read/modify/write should be
  444. // applied.
  445. // Can be any byte string, including the empty string.
  446. bytes column_qualifier = 2;
  447. // The rule used to determine the column's new latest value from its current
  448. // latest value.
  449. oneof rule {
  450. // Rule specifying that `append_value` be appended to the existing value.
  451. // If the targeted cell is unset, it will be treated as containing the
  452. // empty string.
  453. bytes append_value = 3;
  454. // Rule specifying that `increment_amount` be added to the existing value.
  455. // If the targeted cell is unset, it will be treated as containing a zero.
  456. // Otherwise, the targeted cell must contain an 8-byte value (interpreted
  457. // as a 64-bit big-endian signed integer), or the entire request will fail.
  458. int64 increment_amount = 4;
  459. }
  460. }