diff --git a/src/SkillServer/migrations/002_fts5_porter_stemmer.sql b/src/SkillServer/migrations/002_fts5_porter_stemmer.sql new file mode 100644 index 0000000..c3e09ef --- /dev/null +++ b/src/SkillServer/migrations/002_fts5_porter_stemmer.sql @@ -0,0 +1,43 @@ +-- 002_fts5_porter_stemmer.sql: Enable Porter stemmer for better search matching + +-- Drop existing triggers before dropping the table +DROP TRIGGER IF EXISTS trg_skills_fts_insert; +DROP TRIGGER IF EXISTS trg_skills_fts_delete; + +-- Recreate FTS table with porter stemmer tokenizer +DROP TABLE IF EXISTS skills_fts; + +CREATE VIRTUAL TABLE skills_fts USING fts5( + name, + description, + category, + tokenize='porter unicode61' +); + +-- Recreate triggers to keep FTS in sync +CREATE TRIGGER trg_skills_fts_insert +AFTER INSERT ON skill_versions +WHEN NEW.is_latest = 1 +BEGIN + DELETE FROM skills_fts WHERE rowid = NEW.skill_id; + INSERT INTO skills_fts(rowid, name, description, category) + SELECT NEW.skill_id, s.name, NEW.description, COALESCE(NEW.category, '') + FROM skills s WHERE s.id = NEW.skill_id; +END; + +CREATE TRIGGER trg_skills_fts_delete +AFTER DELETE ON skill_versions +BEGIN + DELETE FROM skills_fts WHERE rowid = OLD.skill_id; + INSERT INTO skills_fts(rowid, name, description, category) + SELECT s.id, s.name, sv.description, COALESCE(sv.category, '') + FROM skills s + JOIN skill_versions sv ON sv.skill_id = s.id AND sv.is_latest = 1 + WHERE s.id = OLD.skill_id; +END; + +-- Repopulate FTS index from existing data +INSERT INTO skills_fts(rowid, name, description, category) +SELECT s.id, s.name, sv.description, COALESCE(sv.category, '') +FROM skills s +JOIN skill_versions sv ON sv.skill_id = s.id AND sv.is_latest = 1; diff --git a/tests/SkillServer.Integration.Tests/SkillServerIntegrationTests.cs b/tests/SkillServer.Integration.Tests/SkillServerIntegrationTests.cs index 815504e..6524e2b 100644 --- a/tests/SkillServer.Integration.Tests/SkillServerIntegrationTests.cs +++ b/tests/SkillServer.Integration.Tests/SkillServerIntegrationTests.cs @@ -571,4 +571,39 @@ public async Task UploadSkill_DifferentVersions_AreAllowed() var versions = await _fixture.Client.GetSkillVersionsAsync(skillName, ct); Assert.Equal(2, versions.Count); } + + [Fact] + public async Task SearchSkills_WithPorterStemming_MatchesStemmedTerms() + { + var ct = TestContext.Current.CancellationToken; + var prefix = $"stem-{Guid.NewGuid():N}"[..10]; + var skillName = $"{prefix}-closer"; + + var skillContent = $""" + --- + name: {skillName} + description: Helps sales reps close deal opportunities faster + --- + + # Stemming Test + """; + + using var content = new MultipartFormDataContent(); + content.Add(new StringContent(skillName), "name"); + content.Add(new StringContent("1.0.0"), "version"); + + var fileContent = new ByteArrayContent(Encoding.UTF8.GetBytes(skillContent)); + fileContent.Headers.ContentType = new MediaTypeHeaderValue("text/markdown"); + content.Add(fileContent, "file", "SKILL.md"); + + await _fixture.AuthenticatedHttpClient.PostAsync("/skills", content, ct); + + // "closed deals" should match "close deal" via porter stemming + var results = await _fixture.Client.SearchSkillsAsync("closed deals", ct: ct); + Assert.Contains(results, s => s.Name == skillName); + + // "closing" should match "close" via stemming + results = await _fixture.Client.SearchSkillsAsync("closing", ct: ct); + Assert.Contains(results, s => s.Name == skillName); + } }