FMDatabase+FTS3.m 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. //
  2. // FMDatabase+FTS3.m
  3. // fmdb
  4. //
  5. // Created by Andrew on 3/27/14.
  6. // Copyright (c) 2014 Andrew Goodale. All rights reserved.
  7. //
  8. #import "FMDatabase+FTS3.h"
  9. #import "fts3_tokenizer.h"
  10. NSString *const kFTSCommandOptimize = @"optimize";
  11. NSString *const kFTSCommandRebuild = @"rebuild";
  12. NSString *const kFTSCommandIntegrityCheck = @"integrity-check";
  13. NSString *const kFTSCommandMerge = @"merge=%u,%u";
  14. NSString *const kFTSCommandAutoMerge = @"automerge=%u";
  15. /* I know this is an evil global, but we need to be able to map names to implementations. */
  16. static NSMapTable *g_delegateMap = nil;
  17. static NSString *kDefaultTokenizerDelegateKey = @"DefaultTokenizerDelegateKey";
  18. /*
  19. ** Class derived from sqlite3_tokenizer
  20. */
  21. typedef struct FMDBTokenizer
  22. {
  23. sqlite3_tokenizer base;
  24. id<FMTokenizerDelegate> __unsafe_unretained delegate;
  25. } FMDBTokenizer;
  26. /*
  27. ** Create a new tokenizer instance.
  28. */
  29. static int FMDBTokenizerCreate(int argc, const char * const *argv, sqlite3_tokenizer **ppTokenizer)
  30. {
  31. FMDBTokenizer *tokenizer = (FMDBTokenizer *) sqlite3_malloc(sizeof(FMDBTokenizer));
  32. if (tokenizer == NULL) {
  33. return SQLITE_NOMEM;
  34. }
  35. memset(tokenizer, 0, sizeof(*tokenizer));
  36. NSString *key = kDefaultTokenizerDelegateKey;
  37. if (argc > 0) {
  38. key = [NSString stringWithUTF8String:argv[0]];
  39. }
  40. tokenizer->delegate = [g_delegateMap objectForKey:key];
  41. if (!tokenizer->delegate) {
  42. return SQLITE_ERROR;
  43. }
  44. *ppTokenizer = &tokenizer->base;
  45. return SQLITE_OK;
  46. }
  47. /*
  48. ** Destroy a tokenizer
  49. */
  50. static int FMDBTokenizerDestroy(sqlite3_tokenizer *pTokenizer)
  51. {
  52. sqlite3_free(pTokenizer);
  53. return SQLITE_OK;
  54. }
  55. /*
  56. ** Prepare to begin tokenizing a particular string. The input
  57. ** string to be tokenized is zInput[0..nInput-1]. A cursor
  58. ** used to incrementally tokenize this string is returned in
  59. ** *ppCursor.
  60. */
  61. static int FMDBTokenizerOpen(sqlite3_tokenizer *pTokenizer, /* The tokenizer */
  62. const char *pInput, int nBytes, /* String to be tokenized */
  63. sqlite3_tokenizer_cursor **ppCursor) /* OUT: Tokenization cursor */
  64. {
  65. FMDBTokenizer *tokenizer = (FMDBTokenizer *)pTokenizer;
  66. FMTokenizerCursor *cursor = (FMTokenizerCursor *)sqlite3_malloc(sizeof(FMTokenizerCursor));
  67. if (cursor == NULL) {
  68. return SQLITE_NOMEM;
  69. }
  70. if (pInput == NULL || pInput[0] == '\0') {
  71. cursor->inputString = CFRetain(CFSTR(""));
  72. } else {
  73. nBytes = (nBytes < 0) ? (int) strlen(pInput) : nBytes;
  74. cursor->inputString = CFStringCreateWithBytesNoCopy(NULL, (const UInt8 *)pInput, nBytes,
  75. kCFStringEncodingUTF8, false, kCFAllocatorNull);
  76. }
  77. cursor->currentRange = CFRangeMake(0, 0);
  78. cursor->tokenIndex = 0;
  79. cursor->tokenString = NULL;
  80. cursor->userObject = NULL;
  81. cursor->outputBuf[0] = '\0';
  82. [tokenizer->delegate openTokenizerCursor:cursor];
  83. *ppCursor = (sqlite3_tokenizer_cursor *)cursor;
  84. return SQLITE_OK;
  85. }
  86. /*
  87. ** Close a tokenization cursor previously opened by a call to
  88. ** FMDBTokenizerOpen() above.
  89. */
  90. static int FMDBTokenizerClose(sqlite3_tokenizer_cursor *pCursor)
  91. {
  92. FMTokenizerCursor *cursor = (FMTokenizerCursor *)pCursor;
  93. FMDBTokenizer *tokenizer = (FMDBTokenizer *)cursor->tokenizer;
  94. [tokenizer->delegate closeTokenizerCursor:cursor];
  95. if (cursor->userObject) {
  96. CFRelease(cursor->userObject);
  97. }
  98. if (cursor->tokenString) {
  99. CFRelease(cursor->tokenString);
  100. }
  101. CFRelease(cursor->inputString);
  102. sqlite3_free(cursor);
  103. return SQLITE_OK;
  104. }
  105. /*
  106. ** Extract the next token from a tokenization cursor. The cursor must
  107. ** have been opened by a prior call to FMDBTokenizerOpen().
  108. */
  109. static int FMDBTokenizerNext(sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by Open */
  110. const char **pzToken, /* OUT: *pzToken is the token text */
  111. int *pnBytes, /* OUT: Number of bytes in token */
  112. int *piStartOffset, /* OUT: Starting offset of token */
  113. int *piEndOffset, /* OUT: Ending offset of token */
  114. int *piPosition) /* OUT: Position integer of token */
  115. {
  116. FMTokenizerCursor *cursor = (FMTokenizerCursor *)pCursor;
  117. FMDBTokenizer *tokenizer = (FMDBTokenizer *)cursor->tokenizer;
  118. if ([tokenizer->delegate nextTokenForCursor:cursor]) {
  119. return SQLITE_DONE;
  120. }
  121. // The range from the tokenizer is in UTF-16 positions, we need give UTF-8 positions to SQLite.
  122. CFIndex usedBytes1, usedBytes2;
  123. CFRange range1 = CFRangeMake(0, cursor->currentRange.location);
  124. CFRange range2 = CFRangeMake(0, cursor->currentRange.length);
  125. // This will tell us how many UTF-8 bytes there are before the start of the token
  126. CFStringGetBytes(cursor->inputString, range1, kCFStringEncodingUTF8, '?', false,
  127. NULL, 0, &usedBytes1);
  128. CFStringGetBytes(cursor->tokenString, range2, kCFStringEncodingUTF8, '?', false,
  129. cursor->outputBuf, sizeof(cursor->outputBuf), &usedBytes2);
  130. *pzToken = (char *) cursor->outputBuf;
  131. *pnBytes = (int) usedBytes2;
  132. *piStartOffset = (int) usedBytes1;
  133. *piEndOffset = (int) (usedBytes1 + usedBytes2);
  134. *piPosition = cursor->tokenIndex++;
  135. return SQLITE_OK;
  136. }
  137. /*
  138. ** The set of routines that bridge to the tokenizer delegate.
  139. */
  140. static const sqlite3_tokenizer_module FMDBTokenizerModule =
  141. {
  142. 0,
  143. FMDBTokenizerCreate,
  144. FMDBTokenizerDestroy,
  145. FMDBTokenizerOpen,
  146. FMDBTokenizerClose,
  147. FMDBTokenizerNext
  148. };
  149. #pragma mark
  150. @implementation FMDatabase (FTS3)
  151. + (void)registerTokenizer:(id<FMTokenizerDelegate>)tokenizer withKey:(NSString *)key
  152. {
  153. NSParameterAssert(tokenizer);
  154. NSParameterAssert([key length]);
  155. static dispatch_once_t onceToken;
  156. dispatch_once(&onceToken, ^{
  157. g_delegateMap = [NSMapTable mapTableWithKeyOptions:NSPointerFunctionsCopyIn
  158. valueOptions:NSPointerFunctionsWeakMemory];
  159. });
  160. [g_delegateMap setObject:tokenizer forKey:key];
  161. }
  162. + (void)registerTokenizer:(id<FMTokenizerDelegate>)tokenizer
  163. {
  164. [self registerTokenizer:tokenizer withKey:kDefaultTokenizerDelegateKey];
  165. }
  166. - (BOOL)installTokenizerModuleWithName:(NSString *)name
  167. {
  168. const sqlite3_tokenizer_module *module = &FMDBTokenizerModule;
  169. NSData *tokenizerData = [NSData dataWithBytes:&module length:sizeof(module)];
  170. FMResultSet *results = [self executeQuery:@"SELECT fts3_tokenizer(?, ?)", name, tokenizerData];
  171. if ([results next]) {
  172. [results close];
  173. return YES;
  174. }
  175. return NO;
  176. }
  177. - (BOOL)installTokenizerModule
  178. {
  179. return [self installTokenizerModuleWithName:@"fmdb"];
  180. }
  181. - (BOOL)issueCommand:(NSString *)command forTable:(NSString *)tableName
  182. {
  183. NSString *sql = [NSString stringWithFormat:@"INSERT INTO %1$@(%1$@) VALUES (?)", tableName];
  184. return [self executeUpdate:sql, command];
  185. }
  186. @end
  187. #pragma mark
  188. @implementation FMTextOffsets
  189. {
  190. NSString *_rawOffsets;
  191. }
  192. - (instancetype)initWithDBOffsets:(const char *)rawOffsets
  193. {
  194. if ((self = [super init])) {
  195. _rawOffsets = [NSString stringWithUTF8String:rawOffsets];
  196. }
  197. return self;
  198. }
  199. - (void)enumerateWithBlock:(void (^)(NSInteger, NSInteger, NSRange))block
  200. {
  201. const char *rawOffsets = [_rawOffsets UTF8String];
  202. uint32_t offsetInt[4];
  203. int charsRead = 0;
  204. while (sscanf(rawOffsets, "%u %u %u %u%n",
  205. &offsetInt[0], &offsetInt[1], &offsetInt[2], &offsetInt[3], &charsRead) == 4) {
  206. block(offsetInt[0], offsetInt[1], NSMakeRange(offsetInt[2], offsetInt[3]));
  207. rawOffsets += charsRead;
  208. }
  209. }
  210. @end
  211. @implementation FMResultSet (FTS3)
  212. - (FMTextOffsets *)offsetsForColumnIndex:(int)columnIdx
  213. {
  214. // The offsets() value is a space separated groups of 4 integers
  215. const char *rawOffsets = (const char *)sqlite3_column_text([_statement statement], columnIdx);
  216. return [[FMTextOffsets alloc] initWithDBOffsets:rawOffsets];
  217. }
  218. @end