cigar.proto 3.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. // Copyright 2016 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.genomics.v1;
  16. import "google/api/annotations.proto";
  17. option cc_enable_arenas = true;
  18. option java_multiple_files = true;
  19. option java_outer_classname = "CigarProto";
  20. option java_package = "com.google.genomics.v1";
  21. // A single CIGAR operation.
  22. message CigarUnit {
  23. // Describes the different types of CIGAR alignment operations that exist.
  24. // Used wherever CIGAR alignments are used.
  25. enum Operation {
  26. OPERATION_UNSPECIFIED = 0;
  27. // An alignment match indicates that a sequence can be aligned to the
  28. // reference without evidence of an INDEL. Unlike the
  29. // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
  30. // the `ALIGNMENT_MATCH` operator does not indicate whether the
  31. // reference and read sequences are an exact match. This operator is
  32. // equivalent to SAM's `M`.
  33. ALIGNMENT_MATCH = 1;
  34. // The insert operator indicates that the read contains evidence of bases
  35. // being inserted into the reference. This operator is equivalent to SAM's
  36. // `I`.
  37. INSERT = 2;
  38. // The delete operator indicates that the read contains evidence of bases
  39. // being deleted from the reference. This operator is equivalent to SAM's
  40. // `D`.
  41. DELETE = 3;
  42. // The skip operator indicates that this read skips a long segment of the
  43. // reference, but the bases have not been deleted. This operator is commonly
  44. // used when working with RNA-seq data, where reads may skip long segments
  45. // of the reference between exons. This operator is equivalent to SAM's
  46. // `N`.
  47. SKIP = 4;
  48. // The soft clip operator indicates that bases at the start/end of a read
  49. // have not been considered during alignment. This may occur if the majority
  50. // of a read maps, except for low quality bases at the start/end of a read.
  51. // This operator is equivalent to SAM's `S`. Bases that are soft
  52. // clipped will still be stored in the read.
  53. CLIP_SOFT = 5;
  54. // The hard clip operator indicates that bases at the start/end of a read
  55. // have been omitted from this alignment. This may occur if this linear
  56. // alignment is part of a chimeric alignment, or if the read has been
  57. // trimmed (for example, during error correction or to trim poly-A tails for
  58. // RNA-seq). This operator is equivalent to SAM's `H`.
  59. CLIP_HARD = 6;
  60. // The pad operator indicates that there is padding in an alignment. This
  61. // operator is equivalent to SAM's `P`.
  62. PAD = 7;
  63. // This operator indicates that this portion of the aligned sequence exactly
  64. // matches the reference. This operator is equivalent to SAM's `=`.
  65. SEQUENCE_MATCH = 8;
  66. // This operator indicates that this portion of the aligned sequence is an
  67. // alignment match to the reference, but a sequence mismatch. This can
  68. // indicate a SNP or a read error. This operator is equivalent to SAM's
  69. // `X`.
  70. SEQUENCE_MISMATCH = 9;
  71. }
  72. Operation operation = 1;
  73. // The number of genomic bases that the operation runs for. Required.
  74. int64 operation_length = 2;
  75. // `referenceSequence` is only used at mismatches
  76. // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
  77. // Filling this field replaces SAM's MD tag. If the relevant information is
  78. // not available, this field is unset.
  79. string reference_sequence = 3;
  80. }