table.int.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /*
  2. ** upb_table
  3. **
  4. ** This header is INTERNAL-ONLY! Its interfaces are not public or stable!
  5. ** This file defines very fast int->upb_value (inttable) and string->upb_value
  6. ** (strtable) hash tables.
  7. **
  8. ** The table uses chained scatter with Brent's variation (inspired by the Lua
  9. ** implementation of hash tables). The hash function for strings is Austin
  10. ** Appleby's "MurmurHash."
  11. **
  12. ** The inttable uses uintptr_t as its key, which guarantees it can be used to
  13. ** store pointers or integers of at least 32 bits (upb isn't really useful on
  14. ** systems where sizeof(void*) < 4).
  15. **
  16. ** The table must be homogenous (all values of the same type). In debug
  17. ** mode, we check this on insert and lookup.
  18. */
  19. #ifndef UPB_TABLE_H_
  20. #define UPB_TABLE_H_
  21. #include <stdint.h>
  22. #include <string.h>
  23. #include "upb/upb.h"
  24. #include "upb/port_def.inc"
  25. #ifdef __cplusplus
  26. extern "C" {
  27. #endif
  28. /* upb_value ******************************************************************/
  29. /* A tagged union (stored untagged inside the table) so that we can check that
  30. * clients calling table accessors are correctly typed without having to have
  31. * an explosion of accessors. */
  32. typedef enum {
  33. UPB_CTYPE_INT32 = 1,
  34. UPB_CTYPE_INT64 = 2,
  35. UPB_CTYPE_UINT32 = 3,
  36. UPB_CTYPE_UINT64 = 4,
  37. UPB_CTYPE_BOOL = 5,
  38. UPB_CTYPE_CSTR = 6,
  39. UPB_CTYPE_PTR = 7,
  40. UPB_CTYPE_CONSTPTR = 8,
  41. UPB_CTYPE_FPTR = 9,
  42. UPB_CTYPE_FLOAT = 10,
  43. UPB_CTYPE_DOUBLE = 11
  44. } upb_ctype_t;
  45. typedef struct {
  46. uint64_t val;
  47. #ifndef NDEBUG
  48. /* In debug mode we carry the value type around also so we can check accesses
  49. * to be sure the right member is being read. */
  50. upb_ctype_t ctype;
  51. #endif
  52. } upb_value;
  53. #ifdef NDEBUG
  54. #define SET_TYPE(dest, val) UPB_UNUSED(val)
  55. #else
  56. #define SET_TYPE(dest, val) dest = val
  57. #endif
  58. /* Like strdup(), which isn't always available since it's not ANSI C. */
  59. char *upb_strdup(const char *s, upb_alloc *a);
  60. /* Variant that works with a length-delimited rather than NULL-delimited string,
  61. * as supported by strtable. */
  62. char *upb_strdup2(const char *s, size_t len, upb_alloc *a);
  63. UPB_INLINE char *upb_gstrdup(const char *s) {
  64. return upb_strdup(s, &upb_alloc_global);
  65. }
  66. UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val,
  67. upb_ctype_t ctype) {
  68. v->val = val;
  69. SET_TYPE(v->ctype, ctype);
  70. }
  71. UPB_INLINE upb_value _upb_value_val(uint64_t val, upb_ctype_t ctype) {
  72. upb_value ret;
  73. _upb_value_setval(&ret, val, ctype);
  74. return ret;
  75. }
  76. /* For each value ctype, define the following set of functions:
  77. *
  78. * // Get/set an int32 from a upb_value.
  79. * int32_t upb_value_getint32(upb_value val);
  80. * void upb_value_setint32(upb_value *val, int32_t cval);
  81. *
  82. * // Construct a new upb_value from an int32.
  83. * upb_value upb_value_int32(int32_t val); */
  84. #define FUNCS(name, membername, type_t, converter, proto_type) \
  85. UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
  86. val->val = (converter)cval; \
  87. SET_TYPE(val->ctype, proto_type); \
  88. } \
  89. UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
  90. upb_value ret; \
  91. upb_value_set ## name(&ret, val); \
  92. return ret; \
  93. } \
  94. UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
  95. UPB_ASSERT_DEBUGVAR(val.ctype == proto_type); \
  96. return (type_t)(converter)val.val; \
  97. }
  98. FUNCS(int32, int32, int32_t, int32_t, UPB_CTYPE_INT32)
  99. FUNCS(int64, int64, int64_t, int64_t, UPB_CTYPE_INT64)
  100. FUNCS(uint32, uint32, uint32_t, uint32_t, UPB_CTYPE_UINT32)
  101. FUNCS(uint64, uint64, uint64_t, uint64_t, UPB_CTYPE_UINT64)
  102. FUNCS(bool, _bool, bool, bool, UPB_CTYPE_BOOL)
  103. FUNCS(cstr, cstr, char*, uintptr_t, UPB_CTYPE_CSTR)
  104. FUNCS(ptr, ptr, void*, uintptr_t, UPB_CTYPE_PTR)
  105. FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR)
  106. FUNCS(fptr, fptr, upb_func*, uintptr_t, UPB_CTYPE_FPTR)
  107. #undef FUNCS
  108. UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) {
  109. memcpy(&val->val, &cval, sizeof(cval));
  110. SET_TYPE(val->ctype, UPB_CTYPE_FLOAT);
  111. }
  112. UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) {
  113. memcpy(&val->val, &cval, sizeof(cval));
  114. SET_TYPE(val->ctype, UPB_CTYPE_DOUBLE);
  115. }
  116. UPB_INLINE upb_value upb_value_float(float cval) {
  117. upb_value ret;
  118. upb_value_setfloat(&ret, cval);
  119. return ret;
  120. }
  121. UPB_INLINE upb_value upb_value_double(double cval) {
  122. upb_value ret;
  123. upb_value_setdouble(&ret, cval);
  124. return ret;
  125. }
  126. #undef SET_TYPE
  127. /* upb_tabkey *****************************************************************/
  128. /* Either:
  129. * 1. an actual integer key, or
  130. * 2. a pointer to a string prefixed by its uint32_t length, owned by us.
  131. *
  132. * ...depending on whether this is a string table or an int table. We would
  133. * make this a union of those two types, but C89 doesn't support statically
  134. * initializing a non-first union member. */
  135. typedef uintptr_t upb_tabkey;
  136. UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
  137. char* mem = (char*)key;
  138. if (len) memcpy(len, mem, sizeof(*len));
  139. return mem + sizeof(*len);
  140. }
  141. /* upb_tabval *****************************************************************/
  142. typedef struct {
  143. uint64_t val;
  144. } upb_tabval;
  145. #define UPB_TABVALUE_EMPTY_INIT {-1}
  146. /* upb_table ******************************************************************/
  147. typedef struct _upb_tabent {
  148. upb_tabkey key;
  149. upb_tabval val;
  150. /* Internal chaining. This is const so we can create static initializers for
  151. * tables. We cast away const sometimes, but *only* when the containing
  152. * upb_table is known to be non-const. This requires a bit of care, but
  153. * the subtlety is confined to table.c. */
  154. const struct _upb_tabent *next;
  155. } upb_tabent;
  156. typedef struct {
  157. size_t count; /* Number of entries in the hash part. */
  158. size_t mask; /* Mask to turn hash value -> bucket. */
  159. upb_ctype_t ctype; /* Type of all values. */
  160. uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
  161. /* Hash table entries.
  162. * Making this const isn't entirely accurate; what we really want is for it to
  163. * have the same const-ness as the table it's inside. But there's no way to
  164. * declare that in C. So we have to make it const so that we can statically
  165. * initialize const hash tables. Then we cast away const when we have to.
  166. */
  167. const upb_tabent *entries;
  168. #ifndef NDEBUG
  169. /* This table's allocator. We make the user pass it in to every relevant
  170. * function and only use this to check it in debug mode. We do this solely
  171. * to keep upb_table as small as possible. This might seem slightly paranoid
  172. * but the plan is to use upb_table for all map fields and extension sets in
  173. * a forthcoming message representation, so there could be a lot of these.
  174. * If this turns out to be too annoying later, we can change it (since this
  175. * is an internal-only header file). */
  176. upb_alloc *alloc;
  177. #endif
  178. } upb_table;
  179. typedef struct {
  180. upb_table t;
  181. } upb_strtable;
  182. typedef struct {
  183. upb_table t; /* For entries that don't fit in the array part. */
  184. const upb_tabval *array; /* Array part of the table. See const note above. */
  185. size_t array_size; /* Array part size. */
  186. size_t array_count; /* Array part number of elements. */
  187. } upb_inttable;
  188. #define UPB_INTTABLE_INIT(count, mask, ctype, size_lg2, ent, a, asize, acount) \
  189. {UPB_TABLE_INIT(count, mask, ctype, size_lg2, ent), a, asize, acount}
  190. #define UPB_EMPTY_INTTABLE_INIT(ctype) \
  191. UPB_INTTABLE_INIT(0, 0, ctype, 0, NULL, NULL, 0, 0)
  192. #define UPB_ARRAY_EMPTYENT -1
  193. UPB_INLINE size_t upb_table_size(const upb_table *t) {
  194. if (t->size_lg2 == 0)
  195. return 0;
  196. else
  197. return 1 << t->size_lg2;
  198. }
  199. /* Internal-only functions, in .h file only out of necessity. */
  200. UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
  201. return e->key == 0;
  202. }
  203. /* Used by some of the unit tests for generic hashing functionality. */
  204. uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed);
  205. UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
  206. return key;
  207. }
  208. UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
  209. return (uint32_t)key;
  210. }
  211. static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
  212. return t->entries + (hash & t->mask);
  213. }
  214. UPB_INLINE bool upb_arrhas(upb_tabval key) {
  215. return key.val != (uint64_t)-1;
  216. }
  217. /* Initialize and uninitialize a table, respectively. If memory allocation
  218. * failed, false is returned that the table is uninitialized. */
  219. bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
  220. bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a);
  221. void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
  222. void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
  223. UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
  224. return upb_inttable_init2(table, ctype, &upb_alloc_global);
  225. }
  226. UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
  227. return upb_strtable_init2(table, ctype, &upb_alloc_global);
  228. }
  229. UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {
  230. upb_inttable_uninit2(table, &upb_alloc_global);
  231. }
  232. UPB_INLINE void upb_strtable_uninit(upb_strtable *table) {
  233. upb_strtable_uninit2(table, &upb_alloc_global);
  234. }
  235. /* Returns the number of values in the table. */
  236. size_t upb_inttable_count(const upb_inttable *t);
  237. UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
  238. return t->t.count;
  239. }
  240. void upb_inttable_packedsize(const upb_inttable *t, size_t *size);
  241. void upb_strtable_packedsize(const upb_strtable *t, size_t *size);
  242. upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs,
  243. size_t size);
  244. upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs,
  245. size_t size);
  246. /* Inserts the given key into the hashtable with the given value. The key must
  247. * not already exist in the hash table. For string tables, the key must be
  248. * NULL-terminated, and the table will make an internal copy of the key.
  249. * Inttables must not insert a value of UINTPTR_MAX.
  250. *
  251. * If a table resize was required but memory allocation failed, false is
  252. * returned and the table is unchanged. */
  253. bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
  254. upb_alloc *a);
  255. bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len,
  256. upb_value val, upb_alloc *a);
  257. UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key,
  258. upb_value val) {
  259. return upb_inttable_insert2(t, key, val, &upb_alloc_global);
  260. }
  261. UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key,
  262. size_t len, upb_value val) {
  263. return upb_strtable_insert3(t, key, len, val, &upb_alloc_global);
  264. }
  265. /* For NULL-terminated strings. */
  266. UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
  267. upb_value val) {
  268. return upb_strtable_insert2(t, key, strlen(key), val);
  269. }
  270. /* Looks up key in this table, returning "true" if the key was found.
  271. * If v is non-NULL, copies the value for this key into *v. */
  272. bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
  273. bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
  274. upb_value *v);
  275. /* For NULL-terminated strings. */
  276. UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
  277. upb_value *v) {
  278. return upb_strtable_lookup2(t, key, strlen(key), v);
  279. }
  280. /* Removes an item from the table. Returns true if the remove was successful,
  281. * and stores the removed item in *val if non-NULL. */
  282. bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
  283. bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
  284. upb_value *val, upb_alloc *alloc);
  285. UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key,
  286. size_t len, upb_value *val) {
  287. return upb_strtable_remove3(t, key, len, val, &upb_alloc_global);
  288. }
  289. /* For NULL-terminated strings. */
  290. UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
  291. upb_value *v) {
  292. return upb_strtable_remove2(t, key, strlen(key), v);
  293. }
  294. /* Updates an existing entry in an inttable. If the entry does not exist,
  295. * returns false and does nothing. Unlike insert/remove, this does not
  296. * invalidate iterators. */
  297. bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
  298. /* Handy routines for treating an inttable like a stack. May not be mixed with
  299. * other insert/remove calls. */
  300. bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a);
  301. upb_value upb_inttable_pop(upb_inttable *t);
  302. UPB_INLINE bool upb_inttable_push(upb_inttable *t, upb_value val) {
  303. return upb_inttable_push2(t, val, &upb_alloc_global);
  304. }
  305. /* Convenience routines for inttables with pointer keys. */
  306. bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
  307. upb_alloc *a);
  308. bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
  309. bool upb_inttable_lookupptr(
  310. const upb_inttable *t, const void *key, upb_value *val);
  311. UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key,
  312. upb_value val) {
  313. return upb_inttable_insertptr2(t, key, val, &upb_alloc_global);
  314. }
  315. /* Optimizes the table for the current set of entries, for both memory use and
  316. * lookup time. Client should call this after all entries have been inserted;
  317. * inserting more entries is legal, but will likely require a table resize. */
  318. void upb_inttable_compact2(upb_inttable *t, upb_alloc *a);
  319. UPB_INLINE void upb_inttable_compact(upb_inttable *t) {
  320. upb_inttable_compact2(t, &upb_alloc_global);
  321. }
  322. /* A special-case inlinable version of the lookup routine for 32-bit
  323. * integers. */
  324. UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
  325. upb_value *v) {
  326. *v = upb_value_int32(0); /* Silence compiler warnings. */
  327. if (key < t->array_size) {
  328. upb_tabval arrval = t->array[key];
  329. if (upb_arrhas(arrval)) {
  330. _upb_value_setval(v, arrval.val, t->t.ctype);
  331. return true;
  332. } else {
  333. return false;
  334. }
  335. } else {
  336. const upb_tabent *e;
  337. if (t->t.entries == NULL) return false;
  338. for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
  339. if ((uint32_t)e->key == key) {
  340. _upb_value_setval(v, e->val.val, t->t.ctype);
  341. return true;
  342. }
  343. if (e->next == NULL) return false;
  344. }
  345. }
  346. }
  347. /* Exposed for testing only. */
  348. bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a);
  349. /* Iterators ******************************************************************/
  350. /* Iterators for int and string tables. We are subject to some kind of unusual
  351. * design constraints:
  352. *
  353. * For high-level languages:
  354. * - we must be able to guarantee that we don't crash or corrupt memory even if
  355. * the program accesses an invalidated iterator.
  356. *
  357. * For C++11 range-based for:
  358. * - iterators must be copyable
  359. * - iterators must be comparable
  360. * - it must be possible to construct an "end" value.
  361. *
  362. * Iteration order is undefined.
  363. *
  364. * Modifying the table invalidates iterators. upb_{str,int}table_done() is
  365. * guaranteed to work even on an invalidated iterator, as long as the table it
  366. * is iterating over has not been freed. Calling next() or accessing data from
  367. * an invalidated iterator yields unspecified elements from the table, but it is
  368. * guaranteed not to crash and to return real table elements (except when done()
  369. * is true). */
  370. /* upb_strtable_iter **********************************************************/
  371. /* upb_strtable_iter i;
  372. * upb_strtable_begin(&i, t);
  373. * for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
  374. * const char *key = upb_strtable_iter_key(&i);
  375. * const upb_value val = upb_strtable_iter_value(&i);
  376. * // ...
  377. * }
  378. */
  379. typedef struct {
  380. const upb_strtable *t;
  381. size_t index;
  382. } upb_strtable_iter;
  383. void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
  384. void upb_strtable_next(upb_strtable_iter *i);
  385. bool upb_strtable_done(const upb_strtable_iter *i);
  386. const char *upb_strtable_iter_key(const upb_strtable_iter *i);
  387. size_t upb_strtable_iter_keylength(const upb_strtable_iter *i);
  388. upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
  389. void upb_strtable_iter_setdone(upb_strtable_iter *i);
  390. bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
  391. const upb_strtable_iter *i2);
  392. /* upb_inttable_iter **********************************************************/
  393. /* upb_inttable_iter i;
  394. * upb_inttable_begin(&i, t);
  395. * for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
  396. * uintptr_t key = upb_inttable_iter_key(&i);
  397. * upb_value val = upb_inttable_iter_value(&i);
  398. * // ...
  399. * }
  400. */
  401. typedef struct {
  402. const upb_inttable *t;
  403. size_t index;
  404. bool array_part;
  405. } upb_inttable_iter;
  406. void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
  407. void upb_inttable_next(upb_inttable_iter *i);
  408. bool upb_inttable_done(const upb_inttable_iter *i);
  409. uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i);
  410. upb_value upb_inttable_iter_value(const upb_inttable_iter *i);
  411. void upb_inttable_iter_setdone(upb_inttable_iter *i);
  412. bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
  413. const upb_inttable_iter *i2);
  414. #ifdef __cplusplus
  415. } /* extern "C" */
  416. #endif
  417. #include "upb/port_undef.inc"
  418. #endif /* UPB_TABLE_H_ */