Эх сурвалжийг харах

Added tests for a simple tokenizer which shortens some strings (by removing plurals). This test fails prior to [2c34277] and now passes.

Evan D. Schoenberg, M.D 10 жил өмнө
parent
commit
f4179471ed

+ 78 - 4
Tests/FMDatabaseFTS3Tests.m

@@ -14,7 +14,17 @@ @interface FMDatabaseFTS3Tests : FMDBTempDBTests
 
 @end
 
-static id<FMTokenizerDelegate> g_testTok = nil;
+/**
+ This tokenizer extends the simple tokenizer to remove 's' from the end of each token if present.
+ Used for testing of the tokenization system.
+ */
+@interface FMDepluralizerTokenizer : NSObject <FMTokenizerDelegate>
++ (instancetype)tokenizerWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer;
+@property (nonatomic, strong) id<FMTokenizerDelegate> m_baseTokenizer;
+@end
+
+static id<FMTokenizerDelegate> g_simpleTok = nil;
+static id<FMTokenizerDelegate> g_depluralizeTok = nil;
 
 @implementation FMDatabaseFTS3Tests
 
@@ -26,8 +36,11 @@ + (void)populateDatabase:(FMDatabase *)db
     [db executeUpdate:@"INSERT INTO mail VALUES('urgent: serious', 'This mail is seen as a more serious mail')"];
 
     // Create a tokenizer instance that will not be de-allocated when the method finishes.
-    g_testTok = [[FMSimpleTokenizer alloc] initWithLocale:NULL];
-    [FMDatabase registerTokenizer:g_testTok withKey:@"testTok"];
+    g_simpleTok = [[FMSimpleTokenizer alloc] initWithLocale:NULL];
+    [FMDatabase registerTokenizer:g_simpleTok withKey:@"testTok"];
+    
+    g_depluralizeTok = [FMDepluralizerTokenizer tokenizerWithBaseTokenizer:g_simpleTok];
+    [FMDatabase registerTokenizer:g_depluralizeTok withKey:@"depluralize"];
 }
 
 - (void)setUp
@@ -71,13 +84,74 @@ - (void)testTokenizer
     XCTAssertTrue(ok, @"Failed to create virtual table: %@", [self.db lastErrorMessage]);
 
     // The FMSimpleTokenizer handles non-ASCII characters well, since it's based on CFStringTokenizer.
-    NSString *text = @"I like the band Queensrÿche. They are really great.";
+    NSString *text = @"I like the band Queensrÿche. They are really great musicians.";
     
     ok = [self.db executeUpdate:@"INSERT INTO simple VALUES(?)", text];
     XCTAssertTrue(ok, @"Failed to insert data: %@", [self.db lastErrorMessage]);
     
     FMResultSet *results = [self.db executeQuery:@"SELECT * FROM simple WHERE simple MATCH ?", @"Queensrÿche"];
     XCTAssertTrue([results next], @"Failed to find result");
+    
+    ok = [self.db executeUpdate:@"CREATE VIRTUAL TABLE depluralize_t USING fts3(tokenize=fmdb depluralize)"];
+    XCTAssertTrue(ok, @"Failed to create virtual table with depluralize tokenizer: %@", [self.db lastErrorMessage]);
+
+    ok = [self.db executeUpdate:@"INSERT INTO depluralize_t VALUES(?)", text];
+    XCTAssertTrue(ok, @"Failed to insert data: %@", [self.db lastErrorMessage]);
+
+    //If depluralization is working, searching for 'bands' should still provide a match as 'band' is in the text
+    results = [self.db executeQuery:@"SELECT * FROM depluralize_t WHERE depluralize_t MATCH ?", @"bands"];
+    XCTAssertTrue([results next], @"Failed to find result");
+    
+    //Demonstrate that depluralization mattered; we should NOT find any results when searching the simple table as it does not use that tokenizer
+    results = [self.db executeQuery:@"SELECT * FROM simple WHERE simple MATCH ?", @"bands"];
+    XCTAssertFalse([results next], @"Found a result where none should be found");
+}
+
+@end
+
+
+
+#pragma mark -
+
+@implementation FMDepluralizerTokenizer
+
++ (instancetype)tokenizerWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer
+{
+    return [[self alloc] initWithBaseTokenizer:tokenizer];
 }
 
+- (instancetype)initWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer
+{
+    NSParameterAssert(tokenizer);
+    
+    if ((self = [super init])) {
+        self.m_baseTokenizer = tokenizer;
+    }
+    return self;
+}
+
+- (void)openTokenizerCursor:(FMTokenizerCursor *)cursor
+{
+    [self.m_baseTokenizer openTokenizerCursor:cursor];
+}
+
+- (BOOL)nextTokenForCursor:(FMTokenizerCursor *)cursor
+{
+    BOOL done = [self.m_baseTokenizer nextTokenForCursor:cursor];
+
+    if (!done) {
+        NSMutableString *tokenString = (__bridge NSMutableString *)(cursor->tokenString);
+        if ([tokenString hasSuffix:@"s"])
+            [tokenString deleteCharactersInRange:NSMakeRange(tokenString.length-1, 1)];
+    }
+
+    return done;
+}
+
+- (void)closeTokenizerCursor:(FMTokenizerCursor *)cursor
+{
+    [self.m_baseTokenizer closeTokenizerCursor:cursor];
+}
+
+
 @end