FMDatabaseFTS3Tests.m 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. //
  2. // FMDatabaseFTS3Tests.m
  3. // fmdb
  4. //
  5. // Created by Seaview Software on 8/26/14.
  6. //
  7. //
  8. #import "FMDBTempDBTests.h"
  9. #import "FMDatabase+FTS3.h"
  10. #import "FMTokenizers.h"
  11. @interface FMDatabaseFTS3Tests : FMDBTempDBTests
  12. @end
  13. /**
  14. This tokenizer extends the simple tokenizer to remove 's' from the end of each token if present.
  15. Used for testing of the tokenization system.
  16. */
  17. @interface FMDepluralizerTokenizer : NSObject <FMTokenizerDelegate>
  18. + (instancetype)tokenizerWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer;
  19. @property (nonatomic, strong) id<FMTokenizerDelegate> m_baseTokenizer;
  20. @end
  21. static id<FMTokenizerDelegate> g_simpleTok = nil;
  22. static id<FMTokenizerDelegate> g_depluralizeTok = nil;
  23. @implementation FMDatabaseFTS3Tests
  24. + (void)populateDatabase:(FMDatabase *)db
  25. {
  26. [db executeUpdate:@"CREATE VIRTUAL TABLE mail USING fts3(subject, body)"];
  27. [db executeUpdate:@"INSERT INTO mail VALUES('hello world', 'This message is a hello world message.')"];
  28. [db executeUpdate:@"INSERT INTO mail VALUES('urgent: serious', 'This mail is seen as a more serious mail')"];
  29. // Create a tokenizer instance that will not be de-allocated when the method finishes.
  30. g_simpleTok = [[FMSimpleTokenizer alloc] initWithLocale:NULL];
  31. [FMDatabase registerTokenizer:g_simpleTok withKey:@"testTok"];
  32. g_depluralizeTok = [FMDepluralizerTokenizer tokenizerWithBaseTokenizer:g_simpleTok];
  33. [FMDatabase registerTokenizer:g_depluralizeTok withKey:@"depluralize"];
  34. }
  35. - (void)setUp
  36. {
  37. [super setUp];
  38. // Put setup code here. This method is called before the invocation of each test method in the class.
  39. }
  40. - (void)tearDown
  41. {
  42. // Put teardown code here. This method is called after the invocation of each test method in the class.
  43. [super tearDown];
  44. }
  45. - (void)testOffsets
  46. {
  47. FMResultSet *results = [self.db executeQuery:@"SELECT offsets(mail) FROM mail WHERE mail MATCH 'world'"];
  48. if ([results next]) {
  49. FMTextOffsets *offsets = [results offsetsForColumnIndex:0];
  50. [offsets enumerateWithBlock:^(NSInteger columnNumber, NSInteger termNumber, NSRange matchRange) {
  51. if (columnNumber == 0) {
  52. XCTAssertEqual(termNumber, 0L);
  53. XCTAssertEqual(matchRange.location, 6UL);
  54. XCTAssertEqual(matchRange.length, 5UL);
  55. } else if (columnNumber == 1) {
  56. XCTAssertEqual(termNumber, 0L);
  57. XCTAssertEqual(matchRange.location, 24UL);
  58. XCTAssertEqual(matchRange.length, 5UL);
  59. }
  60. }];
  61. }
  62. }
  63. - (void)testTokenizer
  64. {
  65. [self.db installTokenizerModule];
  66. BOOL ok = [self.db executeUpdate:@"CREATE VIRTUAL TABLE simple USING fts3(tokenize=fmdb testTok)"];
  67. XCTAssertTrue(ok, @"Failed to create virtual table: %@", [self.db lastErrorMessage]);
  68. // The FMSimpleTokenizer handles non-ASCII characters well, since it's based on CFStringTokenizer.
  69. NSString *text = @"I like the band Queensrÿche. They are really great musicians.";
  70. ok = [self.db executeUpdate:@"INSERT INTO simple VALUES(?)", text];
  71. XCTAssertTrue(ok, @"Failed to insert data: %@", [self.db lastErrorMessage]);
  72. FMResultSet *results = [self.db executeQuery:@"SELECT * FROM simple WHERE simple MATCH ?", @"Queensrÿche"];
  73. XCTAssertTrue([results next], @"Failed to find result");
  74. ok = [self.db executeUpdate:@"CREATE VIRTUAL TABLE depluralize_t USING fts3(tokenize=fmdb depluralize)"];
  75. XCTAssertTrue(ok, @"Failed to create virtual table with depluralize tokenizer: %@", [self.db lastErrorMessage]);
  76. ok = [self.db executeUpdate:@"INSERT INTO depluralize_t VALUES(?)", text];
  77. XCTAssertTrue(ok, @"Failed to insert data: %@", [self.db lastErrorMessage]);
  78. //If depluralization is working, searching for 'bands' should still provide a match as 'band' is in the text
  79. results = [self.db executeQuery:@"SELECT * FROM depluralize_t WHERE depluralize_t MATCH ?", @"bands"];
  80. XCTAssertTrue([results next], @"Failed to find result");
  81. //Demonstrate that depluralization mattered; we should NOT find any results when searching the simple table as it does not use that tokenizer
  82. results = [self.db executeQuery:@"SELECT * FROM simple WHERE simple MATCH ?", @"bands"];
  83. XCTAssertFalse([results next], @"Found a result where none should be found");
  84. }
  85. @end
  86. #pragma mark -
  87. @implementation FMDepluralizerTokenizer
  88. + (instancetype)tokenizerWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer
  89. {
  90. return [[self alloc] initWithBaseTokenizer:tokenizer];
  91. }
  92. - (instancetype)initWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer
  93. {
  94. NSParameterAssert(tokenizer);
  95. if ((self = [super init])) {
  96. self.m_baseTokenizer = tokenizer;
  97. }
  98. return self;
  99. }
  100. - (void)openTokenizerCursor:(FMTokenizerCursor *)cursor
  101. {
  102. [self.m_baseTokenizer openTokenizerCursor:cursor];
  103. }
  104. - (BOOL)nextTokenForCursor:(FMTokenizerCursor *)cursor
  105. {
  106. BOOL done = [self.m_baseTokenizer nextTokenForCursor:cursor];
  107. if (!done) {
  108. NSMutableString *tokenString = (__bridge NSMutableString *)(cursor->tokenString);
  109. if ([tokenString hasSuffix:@"s"])
  110. [tokenString deleteCharactersInRange:NSMakeRange(tokenString.length-1, 1)];
  111. }
  112. return done;
  113. }
  114. - (void)closeTokenizerCursor:(FMTokenizerCursor *)cursor
  115. {
  116. [self.m_baseTokenizer closeTokenizerCursor:cursor];
  117. }
  118. @end