bigtable_data.proto 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.bigtable.v1;
  16. option java_multiple_files = true;
  17. option java_outer_classname = "BigtableDataProto";
  18. option java_package = "com.google.bigtable.v1";
  19. // Specifies the complete (requested) contents of a single row of a table.
  20. // Rows which exceed 256MiB in size cannot be read in full.
  21. message Row {
  22. // The unique key which identifies this row within its table. This is the same
  23. // key that's used to identify the row in, for example, a MutateRowRequest.
  24. // May contain any non-empty byte string up to 4KiB in length.
  25. bytes key = 1;
  26. // May be empty, but only if the entire row is empty.
  27. // The mutual ordering of column families is not specified.
  28. repeated Family families = 2;
  29. }
  30. // Specifies (some of) the contents of a single row/column family of a table.
  31. message Family {
  32. // The unique key which identifies this family within its row. This is the
  33. // same key that's used to identify the family in, for example, a RowFilter
  34. // which sets its "family_name_regex_filter" field.
  35. // Must match [-_.a-zA-Z0-9]+, except that AggregatingRowProcessors may
  36. // produce cells in a sentinel family with an empty name.
  37. // Must be no greater than 64 characters in length.
  38. string name = 1;
  39. // Must not be empty. Sorted in order of increasing "qualifier".
  40. repeated Column columns = 2;
  41. }
  42. // Specifies (some of) the contents of a single row/column of a table.
  43. message Column {
  44. // The unique key which identifies this column within its family. This is the
  45. // same key that's used to identify the column in, for example, a RowFilter
  46. // which sets its "column_qualifier_regex_filter" field.
  47. // May contain any byte string, including the empty string, up to 16kiB in
  48. // length.
  49. bytes qualifier = 1;
  50. // Must not be empty. Sorted in order of decreasing "timestamp_micros".
  51. repeated Cell cells = 2;
  52. }
  53. // Specifies (some of) the contents of a single row/column/timestamp of a table.
  54. message Cell {
  55. // The cell's stored timestamp, which also uniquely identifies it within
  56. // its column.
  57. // Values are always expressed in microseconds, but individual tables may set
  58. // a coarser "granularity" to further restrict the allowed values. For
  59. // example, a table which specifies millisecond granularity will only allow
  60. // values of "timestamp_micros" which are multiples of 1000.
  61. int64 timestamp_micros = 1;
  62. // The value stored in the cell.
  63. // May contain any byte string, including the empty string, up to 100MiB in
  64. // length.
  65. bytes value = 2;
  66. // Labels applied to the cell by a [RowFilter][google.bigtable.v1.RowFilter].
  67. repeated string labels = 3;
  68. }
  69. // Specifies a contiguous range of rows.
  70. message RowRange {
  71. // Inclusive lower bound. If left empty, interpreted as the empty string.
  72. bytes start_key = 2;
  73. // Exclusive upper bound. If left empty, interpreted as infinity.
  74. bytes end_key = 3;
  75. }
  76. // Specifies a non-contiguous set of rows.
  77. message RowSet {
  78. // Single rows included in the set.
  79. repeated bytes row_keys = 1;
  80. // Contiguous row ranges included in the set.
  81. repeated RowRange row_ranges = 2;
  82. }
  83. // Specifies a contiguous range of columns within a single column family.
  84. // The range spans from <column_family>:<start_qualifier> to
  85. // <column_family>:<end_qualifier>, where both bounds can be either inclusive or
  86. // exclusive.
  87. message ColumnRange {
  88. // The name of the column family within which this range falls.
  89. string family_name = 1;
  90. // The column qualifier at which to start the range (within 'column_family').
  91. // If neither field is set, interpreted as the empty string, inclusive.
  92. oneof start_qualifier {
  93. // Used when giving an inclusive lower bound for the range.
  94. bytes start_qualifier_inclusive = 2;
  95. // Used when giving an exclusive lower bound for the range.
  96. bytes start_qualifier_exclusive = 3;
  97. }
  98. // The column qualifier at which to end the range (within 'column_family').
  99. // If neither field is set, interpreted as the infinite string, exclusive.
  100. oneof end_qualifier {
  101. // Used when giving an inclusive upper bound for the range.
  102. bytes end_qualifier_inclusive = 4;
  103. // Used when giving an exclusive upper bound for the range.
  104. bytes end_qualifier_exclusive = 5;
  105. }
  106. }
  107. // Specified a contiguous range of microsecond timestamps.
  108. message TimestampRange {
  109. // Inclusive lower bound. If left empty, interpreted as 0.
  110. int64 start_timestamp_micros = 1;
  111. // Exclusive upper bound. If left empty, interpreted as infinity.
  112. int64 end_timestamp_micros = 2;
  113. }
  114. // Specifies a contiguous range of raw byte values.
  115. message ValueRange {
  116. // The value at which to start the range.
  117. // If neither field is set, interpreted as the empty string, inclusive.
  118. oneof start_value {
  119. // Used when giving an inclusive lower bound for the range.
  120. bytes start_value_inclusive = 1;
  121. // Used when giving an exclusive lower bound for the range.
  122. bytes start_value_exclusive = 2;
  123. }
  124. // The value at which to end the range.
  125. // If neither field is set, interpreted as the infinite string, exclusive.
  126. oneof end_value {
  127. // Used when giving an inclusive upper bound for the range.
  128. bytes end_value_inclusive = 3;
  129. // Used when giving an exclusive upper bound for the range.
  130. bytes end_value_exclusive = 4;
  131. }
  132. }
  133. // Takes a row as input and produces an alternate view of the row based on
  134. // specified rules. For example, a RowFilter might trim down a row to include
  135. // just the cells from columns matching a given regular expression, or might
  136. // return all the cells of a row but not their values. More complicated filters
  137. // can be composed out of these components to express requests such as, "within
  138. // every column of a particular family, give just the two most recent cells
  139. // which are older than timestamp X."
  140. //
  141. // There are two broad categories of RowFilters (true filters and transformers),
  142. // as well as two ways to compose simple filters into more complex ones
  143. // (chains and interleaves). They work as follows:
  144. //
  145. // * True filters alter the input row by excluding some of its cells wholesale
  146. // from the output row. An example of a true filter is the "value_regex_filter",
  147. // which excludes cells whose values don't match the specified pattern. All
  148. // regex true filters use RE2 syntax (https://github.com/google/re2/wiki/Syntax)
  149. // in raw byte mode (RE2::Latin1), and are evaluated as full matches. An
  150. // important point to keep in mind is that RE2(.) is equivalent by default to
  151. // RE2([^\n]), meaning that it does not match newlines. When attempting to match
  152. // an arbitrary byte, you should therefore use the escape sequence '\C', which
  153. // may need to be further escaped as '\\C' in your client language.
  154. //
  155. // * Transformers alter the input row by changing the values of some of its
  156. // cells in the output, without excluding them completely. Currently, the only
  157. // supported transformer is the "strip_value_transformer", which replaces every
  158. // cell's value with the empty string.
  159. //
  160. // * Chains and interleaves are described in more detail in the
  161. // RowFilter.Chain and RowFilter.Interleave documentation.
  162. //
  163. // The total serialized size of a RowFilter message must not
  164. // exceed 4096 bytes, and RowFilters may not be nested within each other
  165. // (in Chains or Interleaves) to a depth of more than 20.
  166. message RowFilter {
  167. // A RowFilter which sends rows through several RowFilters in sequence.
  168. message Chain {
  169. // The elements of "filters" are chained together to process the input row:
  170. // in row -> f(0) -> intermediate row -> f(1) -> ... -> f(N) -> out row
  171. // The full chain is executed atomically.
  172. repeated RowFilter filters = 1;
  173. }
  174. // A RowFilter which sends each row to each of several component
  175. // RowFilters and interleaves the results.
  176. message Interleave {
  177. // The elements of "filters" all process a copy of the input row, and the
  178. // results are pooled, sorted, and combined into a single output row.
  179. // If multiple cells are produced with the same column and timestamp,
  180. // they will all appear in the output row in an unspecified mutual order.
  181. // Consider the following example, with three filters:
  182. //
  183. // input row
  184. // |
  185. // -----------------------------------------------------
  186. // | | |
  187. // f(0) f(1) f(2)
  188. // | | |
  189. // 1: foo,bar,10,x foo,bar,10,z far,bar,7,a
  190. // 2: foo,blah,11,z far,blah,5,x far,blah,5,x
  191. // | | |
  192. // -----------------------------------------------------
  193. // |
  194. // 1: foo,bar,10,z // could have switched with #2
  195. // 2: foo,bar,10,x // could have switched with #1
  196. // 3: foo,blah,11,z
  197. // 4: far,bar,7,a
  198. // 5: far,blah,5,x // identical to #6
  199. // 6: far,blah,5,x // identical to #5
  200. // All interleaved filters are executed atomically.
  201. repeated RowFilter filters = 1;
  202. }
  203. // A RowFilter which evaluates one of two possible RowFilters, depending on
  204. // whether or not a predicate RowFilter outputs any cells from the input row.
  205. //
  206. // IMPORTANT NOTE: The predicate filter does not execute atomically with the
  207. // true and false filters, which may lead to inconsistent or unexpected
  208. // results. Additionally, Condition filters have poor performance, especially
  209. // when filters are set for the false condition.
  210. message Condition {
  211. // If "predicate_filter" outputs any cells, then "true_filter" will be
  212. // evaluated on the input row. Otherwise, "false_filter" will be evaluated.
  213. RowFilter predicate_filter = 1;
  214. // The filter to apply to the input row if "predicate_filter" returns any
  215. // results. If not provided, no results will be returned in the true case.
  216. RowFilter true_filter = 2;
  217. // The filter to apply to the input row if "predicate_filter" does not
  218. // return any results. If not provided, no results will be returned in the
  219. // false case.
  220. RowFilter false_filter = 3;
  221. }
  222. // Which of the possible RowFilter types to apply. If none are set, this
  223. // RowFilter returns all cells in the input row.
  224. oneof filter {
  225. // Applies several RowFilters to the data in sequence, progressively
  226. // narrowing the results.
  227. Chain chain = 1;
  228. // Applies several RowFilters to the data in parallel and combines the
  229. // results.
  230. Interleave interleave = 2;
  231. // Applies one of two possible RowFilters to the data based on the output of
  232. // a predicate RowFilter.
  233. Condition condition = 3;
  234. // ADVANCED USE ONLY.
  235. // Hook for introspection into the RowFilter. Outputs all cells directly to
  236. // the output of the read rather than to any parent filter. Consider the
  237. // following example:
  238. //
  239. // Chain(
  240. // FamilyRegex("A"),
  241. // Interleave(
  242. // All(),
  243. // Chain(Label("foo"), Sink())
  244. // ),
  245. // QualifierRegex("B")
  246. // )
  247. //
  248. // A,A,1,w
  249. // A,B,2,x
  250. // B,B,4,z
  251. // |
  252. // FamilyRegex("A")
  253. // |
  254. // A,A,1,w
  255. // A,B,2,x
  256. // |
  257. // +------------+-------------+
  258. // | |
  259. // All() Label(foo)
  260. // | |
  261. // A,A,1,w A,A,1,w,labels:[foo]
  262. // A,B,2,x A,B,2,x,labels:[foo]
  263. // | |
  264. // | Sink() --------------+
  265. // | | |
  266. // +------------+ x------+ A,A,1,w,labels:[foo]
  267. // | A,B,2,x,labels:[foo]
  268. // A,A,1,w |
  269. // A,B,2,x |
  270. // | |
  271. // QualifierRegex("B") |
  272. // | |
  273. // A,B,2,x |
  274. // | |
  275. // +--------------------------------+
  276. // |
  277. // A,A,1,w,labels:[foo]
  278. // A,B,2,x,labels:[foo] // could be switched
  279. // A,B,2,x // could be switched
  280. //
  281. // Despite being excluded by the qualifier filter, a copy of every cell
  282. // that reaches the sink is present in the final result.
  283. //
  284. // As with an [Interleave][google.bigtable.v1.RowFilter.Interleave],
  285. // duplicate cells are possible, and appear in an unspecified mutual order.
  286. // In this case we have a duplicate with column "A:B" and timestamp 2,
  287. // because one copy passed through the all filter while the other was
  288. // passed through the label and sink. Note that one copy has label "foo",
  289. // while the other does not.
  290. //
  291. // Cannot be used within the `predicate_filter`, `true_filter`, or
  292. // `false_filter` of a [Condition][google.bigtable.v1.RowFilter.Condition].
  293. bool sink = 16;
  294. // Matches all cells, regardless of input. Functionally equivalent to
  295. // leaving `filter` unset, but included for completeness.
  296. bool pass_all_filter = 17;
  297. // Does not match any cells, regardless of input. Useful for temporarily
  298. // disabling just part of a filter.
  299. bool block_all_filter = 18;
  300. // Matches only cells from rows whose keys satisfy the given RE2 regex. In
  301. // other words, passes through the entire row when the key matches, and
  302. // otherwise produces an empty row.
  303. // Note that, since row keys can contain arbitrary bytes, the '\C' escape
  304. // sequence must be used if a true wildcard is desired. The '.' character
  305. // will not match the new line character '\n', which may be present in a
  306. // binary key.
  307. bytes row_key_regex_filter = 4;
  308. // Matches all cells from a row with probability p, and matches no cells
  309. // from the row with probability 1-p.
  310. double row_sample_filter = 14;
  311. // Matches only cells from columns whose families satisfy the given RE2
  312. // regex. For technical reasons, the regex must not contain the ':'
  313. // character, even if it is not being used as a literal.
  314. // Note that, since column families cannot contain the new line character
  315. // '\n', it is sufficient to use '.' as a full wildcard when matching
  316. // column family names.
  317. string family_name_regex_filter = 5;
  318. // Matches only cells from columns whose qualifiers satisfy the given RE2
  319. // regex.
  320. // Note that, since column qualifiers can contain arbitrary bytes, the '\C'
  321. // escape sequence must be used if a true wildcard is desired. The '.'
  322. // character will not match the new line character '\n', which may be
  323. // present in a binary qualifier.
  324. bytes column_qualifier_regex_filter = 6;
  325. // Matches only cells from columns within the given range.
  326. ColumnRange column_range_filter = 7;
  327. // Matches only cells with timestamps within the given range.
  328. TimestampRange timestamp_range_filter = 8;
  329. // Matches only cells with values that satisfy the given regular expression.
  330. // Note that, since cell values can contain arbitrary bytes, the '\C' escape
  331. // sequence must be used if a true wildcard is desired. The '.' character
  332. // will not match the new line character '\n', which may be present in a
  333. // binary value.
  334. bytes value_regex_filter = 9;
  335. // Matches only cells with values that fall within the given range.
  336. ValueRange value_range_filter = 15;
  337. // Skips the first N cells of each row, matching all subsequent cells.
  338. // If duplicate cells are present, as is possible when using an Interleave,
  339. // each copy of the cell is counted separately.
  340. int32 cells_per_row_offset_filter = 10;
  341. // Matches only the first N cells of each row.
  342. // If duplicate cells are present, as is possible when using an Interleave,
  343. // each copy of the cell is counted separately.
  344. int32 cells_per_row_limit_filter = 11;
  345. // Matches only the most recent N cells within each column. For example,
  346. // if N=2, this filter would match column "foo:bar" at timestamps 10 and 9,
  347. // skip all earlier cells in "foo:bar", and then begin matching again in
  348. // column "foo:bar2".
  349. // If duplicate cells are present, as is possible when using an Interleave,
  350. // each copy of the cell is counted separately.
  351. int32 cells_per_column_limit_filter = 12;
  352. // Replaces each cell's value with the empty string.
  353. bool strip_value_transformer = 13;
  354. // Applies the given label to all cells in the output row. This allows
  355. // the client to determine which results were produced from which part of
  356. // the filter.
  357. //
  358. // Values must be at most 15 characters in length, and match the RE2
  359. // pattern [a-z0-9\\-]+
  360. //
  361. // Due to a technical limitation, it is not currently possible to apply
  362. // multiple labels to a cell. As a result, a Chain may have no more than
  363. // one sub-filter which contains a apply_label_transformer. It is okay for
  364. // an Interleave to contain multiple apply_label_transformers, as they will
  365. // be applied to separate copies of the input. This may be relaxed in the
  366. // future.
  367. string apply_label_transformer = 19;
  368. }
  369. }
  370. // Specifies a particular change to be made to the contents of a row.
  371. message Mutation {
  372. // A Mutation which sets the value of the specified cell.
  373. message SetCell {
  374. // The name of the family into which new data should be written.
  375. // Must match [-_.a-zA-Z0-9]+
  376. string family_name = 1;
  377. // The qualifier of the column into which new data should be written.
  378. // Can be any byte string, including the empty string.
  379. bytes column_qualifier = 2;
  380. // The timestamp of the cell into which new data should be written.
  381. // Use -1 for current Bigtable server time.
  382. // Otherwise, the client should set this value itself, noting that the
  383. // default value is a timestamp of zero if the field is left unspecified.
  384. // Values must match the "granularity" of the table (e.g. micros, millis).
  385. int64 timestamp_micros = 3;
  386. // The value to be written into the specified cell.
  387. bytes value = 4;
  388. }
  389. // A Mutation which deletes cells from the specified column, optionally
  390. // restricting the deletions to a given timestamp range.
  391. message DeleteFromColumn {
  392. // The name of the family from which cells should be deleted.
  393. // Must match [-_.a-zA-Z0-9]+
  394. string family_name = 1;
  395. // The qualifier of the column from which cells should be deleted.
  396. // Can be any byte string, including the empty string.
  397. bytes column_qualifier = 2;
  398. // The range of timestamps within which cells should be deleted.
  399. TimestampRange time_range = 3;
  400. }
  401. // A Mutation which deletes all cells from the specified column family.
  402. message DeleteFromFamily {
  403. // The name of the family from which cells should be deleted.
  404. // Must match [-_.a-zA-Z0-9]+
  405. string family_name = 1;
  406. }
  407. // A Mutation which deletes all cells from the containing row.
  408. message DeleteFromRow {
  409. }
  410. // Which of the possible Mutation types to apply.
  411. oneof mutation {
  412. // Set a cell's value.
  413. SetCell set_cell = 1;
  414. // Deletes cells from a column.
  415. DeleteFromColumn delete_from_column = 2;
  416. // Deletes cells from a column family.
  417. DeleteFromFamily delete_from_family = 3;
  418. // Deletes cells from the entire row.
  419. DeleteFromRow delete_from_row = 4;
  420. }
  421. }
  422. // Specifies an atomic read/modify/write operation on the latest value of the
  423. // specified column.
  424. message ReadModifyWriteRule {
  425. // The name of the family to which the read/modify/write should be applied.
  426. // Must match [-_.a-zA-Z0-9]+
  427. string family_name = 1;
  428. // The qualifier of the column to which the read/modify/write should be
  429. // applied.
  430. // Can be any byte string, including the empty string.
  431. bytes column_qualifier = 2;
  432. // The rule used to determine the column's new latest value from its current
  433. // latest value.
  434. oneof rule {
  435. // Rule specifying that "append_value" be appended to the existing value.
  436. // If the targeted cell is unset, it will be treated as containing the
  437. // empty string.
  438. bytes append_value = 3;
  439. // Rule specifying that "increment_amount" be added to the existing value.
  440. // If the targeted cell is unset, it will be treated as containing a zero.
  441. // Otherwise, the targeted cell must contain an 8-byte value (interpreted
  442. // as a 64-bit big-endian signed integer), or the entire request will fail.
  443. int64 increment_amount = 4;
  444. }
  445. }