FMDatabase+FTS3.m 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. //
  2. // FMDatabase+FTS3.m
  3. // fmdb
  4. //
  5. // Created by Andrew on 3/27/14.
  6. // Copyright (c) 2014 Andrew Goodale. All rights reserved.
  7. //
  8. #import "FMDatabase+FTS3.h"
  9. #import "fts3_tokenizer.h"
  10. NSString *const kFTSCommandOptimize = @"optimize";
  11. NSString *const kFTSCommandRebuild = @"rebuild";
  12. NSString *const kFTSCommandIntegrityCheck = @"integrity-check";
  13. NSString *const kFTSCommandMerge = @"merge=%u,%u";
  14. NSString *const kFTSCommandAutoMerge = @"automerge=%u";
  15. /* I know this is an evil global, but we need to be able to map names to implementations. */
  16. static NSMapTable *g_delegateMap = nil;
  17. /*
  18. ** Class derived from sqlite3_tokenizer
  19. */
  20. typedef struct FMDBTokenizer
  21. {
  22. sqlite3_tokenizer base;
  23. id<FMTokenizerDelegate> __unsafe_unretained delegate;
  24. } FMDBTokenizer;
  25. /*
  26. ** Create a new tokenizer instance.
  27. */
  28. static int FMDBTokenizerCreate(int argc, const char * const *argv, sqlite3_tokenizer **ppTokenizer)
  29. {
  30. NSCParameterAssert(argc > 0); // Check that the name of the tokenizer is set in CREATE VIRTUAL TABLE
  31. FMDBTokenizer *tokenizer = (FMDBTokenizer *) sqlite3_malloc(sizeof(FMDBTokenizer));
  32. if (tokenizer == NULL) {
  33. return SQLITE_NOMEM;
  34. }
  35. memset(tokenizer, 0, sizeof(*tokenizer));
  36. tokenizer->delegate = [g_delegateMap objectForKey:[NSString stringWithUTF8String:argv[0]]];
  37. if (!tokenizer->delegate) {
  38. return SQLITE_ERROR;
  39. }
  40. *ppTokenizer = &tokenizer->base;
  41. return SQLITE_OK;
  42. }
  43. /*
  44. ** Destroy a tokenizer
  45. */
  46. static int FMDBTokenizerDestroy(sqlite3_tokenizer *pTokenizer)
  47. {
  48. sqlite3_free(pTokenizer);
  49. return SQLITE_OK;
  50. }
  51. /*
  52. ** Prepare to begin tokenizing a particular string. The input
  53. ** string to be tokenized is zInput[0..nInput-1]. A cursor
  54. ** used to incrementally tokenize this string is returned in
  55. ** *ppCursor.
  56. */
  57. static int FMDBTokenizerOpen(sqlite3_tokenizer *pTokenizer, /* The tokenizer */
  58. const char *pInput, int nBytes, /* String to be tokenized */
  59. sqlite3_tokenizer_cursor **ppCursor) /* OUT: Tokenization cursor */
  60. {
  61. FMDBTokenizer *tokenizer = (FMDBTokenizer *)pTokenizer;
  62. FMTokenizerCursor *cursor = (FMTokenizerCursor *)sqlite3_malloc(sizeof(FMTokenizerCursor));
  63. if (cursor == NULL) {
  64. return SQLITE_NOMEM;
  65. }
  66. if (pInput == NULL || pInput[0] == '\0') {
  67. cursor->inputString = CFRetain(CFSTR(""));
  68. } else {
  69. nBytes = (nBytes < 0) ? (int) strlen(pInput) : nBytes;
  70. cursor->inputString = CFStringCreateWithBytesNoCopy(NULL, (const UInt8 *)pInput, nBytes,
  71. kCFStringEncodingUTF8, false, kCFAllocatorNull);
  72. }
  73. cursor->currentRange = CFRangeMake(0, 0);
  74. cursor->tokenIndex = 0;
  75. cursor->tokenString = NULL;
  76. cursor->userObject = NULL;
  77. cursor->outputBuf[0] = '\0';
  78. [tokenizer->delegate openTokenizerCursor:cursor];
  79. *ppCursor = (sqlite3_tokenizer_cursor *)cursor;
  80. return SQLITE_OK;
  81. }
  82. /*
  83. ** Close a tokenization cursor previously opened by a call to
  84. ** FMDBTokenizerOpen() above.
  85. */
  86. static int FMDBTokenizerClose(sqlite3_tokenizer_cursor *pCursor)
  87. {
  88. FMTokenizerCursor *cursor = (FMTokenizerCursor *)pCursor;
  89. FMDBTokenizer *tokenizer = (FMDBTokenizer *)cursor->tokenizer;
  90. [tokenizer->delegate closeTokenizerCursor:cursor];
  91. if (cursor->userObject) {
  92. CFRelease(cursor->userObject);
  93. }
  94. if (cursor->tokenString) {
  95. CFRelease(cursor->tokenString);
  96. }
  97. CFRelease(cursor->inputString);
  98. sqlite3_free(cursor);
  99. return SQLITE_OK;
  100. }
  101. /*
  102. ** Extract the next token from a tokenization cursor. The cursor must
  103. ** have been opened by a prior call to FMDBTokenizerOpen().
  104. */
  105. static int FMDBTokenizerNext(sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by Open */
  106. const char **pzToken, /* OUT: *pzToken is the token text */
  107. int *pnBytes, /* OUT: Number of bytes in token */
  108. int *piStartOffset, /* OUT: Starting offset of token */
  109. int *piEndOffset, /* OUT: Ending offset of token */
  110. int *piPosition) /* OUT: Position integer of token */
  111. {
  112. FMTokenizerCursor *cursor = (FMTokenizerCursor *)pCursor;
  113. FMDBTokenizer *tokenizer = (FMDBTokenizer *)cursor->tokenizer;
  114. if ([tokenizer->delegate nextTokenForCursor:cursor]) {
  115. return SQLITE_DONE;
  116. }
  117. // The range from the tokenizer is in UTF-16 positions, we need give UTF-8 positions to SQLite.
  118. CFIndex usedBytes1, usedBytes2;
  119. CFRange range1 = CFRangeMake(0, cursor->currentRange.location);
  120. CFRange range2 = CFRangeMake(0, cursor->currentRange.length);
  121. // This will tell us how many UTF-8 bytes there are before the start of the token
  122. CFStringGetBytes(cursor->inputString, range1, kCFStringEncodingUTF8, '?', false,
  123. NULL, 0, &usedBytes1);
  124. CFStringGetBytes(cursor->tokenString, range2, kCFStringEncodingUTF8, '?', false,
  125. cursor->outputBuf, sizeof(cursor->outputBuf), &usedBytes2);
  126. *pzToken = (char *) cursor->outputBuf;
  127. *pnBytes = (int) usedBytes2;
  128. *piStartOffset = (int) usedBytes1;
  129. *piEndOffset = (int) (usedBytes1 + usedBytes2);
  130. *piPosition = cursor->tokenIndex++;
  131. return SQLITE_OK;
  132. }
  133. /*
  134. ** The set of routines that bridge to the tokenizer delegate.
  135. */
  136. static const sqlite3_tokenizer_module FMDBTokenizerModule =
  137. {
  138. 0,
  139. FMDBTokenizerCreate,
  140. FMDBTokenizerDestroy,
  141. FMDBTokenizerOpen,
  142. FMDBTokenizerClose,
  143. FMDBTokenizerNext
  144. };
  145. #pragma mark
  146. @implementation FMDatabase (FTS3)
  147. + (void)registerTokenizer:(id<FMTokenizerDelegate>)tokenizer withName:(NSString *)name
  148. {
  149. NSParameterAssert(tokenizer);
  150. NSParameterAssert([name length]);
  151. static dispatch_once_t onceToken;
  152. dispatch_once(&onceToken, ^{
  153. g_delegateMap = [NSMapTable mapTableWithKeyOptions:NSPointerFunctionsCopyIn
  154. valueOptions:NSPointerFunctionsWeakMemory];
  155. });
  156. [g_delegateMap setObject:tokenizer forKey:name];
  157. }
  158. - (BOOL)installTokenizerModule
  159. {
  160. const sqlite3_tokenizer_module *module = &FMDBTokenizerModule;
  161. NSData *tokenizerData = [NSData dataWithBytes:&module length:sizeof(module)];
  162. FMResultSet *results = [self executeQuery:@"SELECT fts3_tokenizer('fmdb', ?)", tokenizerData];
  163. if ([results next]) {
  164. [results close];
  165. return YES;
  166. }
  167. return NO;
  168. }
  169. - (BOOL)issueCommand:(NSString *)command forTable:(NSString *)tableName
  170. {
  171. NSString *sql = [NSString stringWithFormat:@"INSERT INTO %1$@(%1$@) VALUES (?)", tableName];
  172. return [self executeUpdate:sql, command];
  173. }
  174. @end
  175. #pragma mark
  176. @implementation FMTextOffsets
  177. {
  178. NSString *_rawOffsets;
  179. }
  180. - (instancetype)initWithDBOffsets:(const char *)rawOffsets
  181. {
  182. if ((self = [super init])) {
  183. _rawOffsets = [NSString stringWithUTF8String:rawOffsets];
  184. }
  185. return self;
  186. }
  187. - (void)enumerateWithBlock:(void (^)(NSInteger, NSInteger, NSRange))block
  188. {
  189. const char *rawOffsets = [_rawOffsets UTF8String];
  190. uint32_t offsetInt[4];
  191. int charsRead = 0;
  192. while (sscanf(rawOffsets, "%u %u %u %u%n",
  193. &offsetInt[0], &offsetInt[1], &offsetInt[2], &offsetInt[3], &charsRead) == 4) {
  194. block(offsetInt[0], offsetInt[1], NSMakeRange(offsetInt[2], offsetInt[3]));
  195. rawOffsets += charsRead;
  196. }
  197. }
  198. @end
  199. @implementation FMResultSet (FTS3)
  200. - (FMTextOffsets *)offsetsForColumnIndex:(int)columnIdx
  201. {
  202. // The offsets() value is a space separated groups of 4 integers
  203. const char *rawOffsets = (const char *)sqlite3_column_text([_statement statement], columnIdx);
  204. return [[FMTextOffsets alloc] initWithDBOffsets:rawOffsets];
  205. }
  206. @end