Skip to content

Commit c226523

Browse files
kriskclaude
andcommitted
fix(search): handle quoted tokens with inner spaces and quotes in extended search
Replace SPACE_RE regex splitting with a proper tokenizer that correctly handles multi-match quoted tokens like ="said "test" where inner spaces and quotes are part of the search pattern. Closes #810 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 72c5c75 commit c226523

8 files changed

Lines changed: 243 additions & 21 deletions

File tree

dist/fuse.cjs

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,18 +1317,62 @@ var IncludeMatch = /*#__PURE__*/function (_BaseMatch) {
13171317
// ❗Order is important. DO NOT CHANGE.
13181318
var searchers = [ExactMatch, IncludeMatch, PrefixExactMatch, InversePrefixExactMatch, InverseSuffixExactMatch, SuffixExactMatch, InverseExactMatch, FuzzyMatch];
13191319
var searchersLen = searchers.length;
1320-
1321-
// Regex to split by spaces, but keep anything in quotes together
1322-
var SPACE_RE = / +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/;
13231320
var OR_TOKEN = '|';
13241321

1322+
// Tokenize a query string into individual search terms.
1323+
// Respects multi-match quoted tokens like ="said "test"" or ^"hello world"$
1324+
// where inner spaces and quotes are part of the token.
1325+
function tokenize(pattern) {
1326+
var tokens = [];
1327+
var len = pattern.length;
1328+
var i = 0;
1329+
while (i < len) {
1330+
// Skip spaces
1331+
while (i < len && pattern[i] === ' ') i++;
1332+
if (i >= len) break;
1333+
1334+
// Scan past prefix characters (=, !, ^, ') to see if a quote follows
1335+
var j = i;
1336+
while (j < len && pattern[j] !== ' ' && pattern[j] !== '"') j++;
1337+
if (j < len && pattern[j] === '"') {
1338+
// Multi-match token: prefix + "content" (possibly with inner quotes)
1339+
// Find the closing " that ends this token:
1340+
// it must be followed by optional $, then space or end-of-string
1341+
j++; // skip opening quote
1342+
while (j < len) {
1343+
if (pattern[j] === '"') {
1344+
// Check if this is the closing quote
1345+
var next = j + 1;
1346+
if (next >= len || pattern[next] === ' ') {
1347+
j++; // include closing quote
1348+
break;
1349+
}
1350+
if (pattern[next] === '$' && (next + 1 >= len || pattern[next + 1] === ' ')) {
1351+
j += 2; // include "$
1352+
break;
1353+
}
1354+
}
1355+
j++;
1356+
}
1357+
tokens.push(pattern.substring(i, j));
1358+
i = j;
1359+
} else {
1360+
// Regular (unquoted) token: read until space or end
1361+
while (j < len && pattern[j] !== ' ') j++;
1362+
tokens.push(pattern.substring(i, j));
1363+
i = j;
1364+
}
1365+
}
1366+
return tokens;
1367+
}
1368+
13251369
// Return a 2D array representation of the query, for simpler parsing.
13261370
// Example:
13271371
// "^core go$ | rb$ | py$ xy$" => [["^core", "go$"], ["rb$"], ["py$", "xy$"]]
13281372
function parseQuery(pattern) {
13291373
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
13301374
return pattern.split(OR_TOKEN).map(function (item) {
1331-
var query = item.trim().split(SPACE_RE).filter(function (item) {
1375+
var query = tokenize(item.trim()).filter(function (item) {
13321376
return item && !!item.trim();
13331377
});
13341378
var results = [];

dist/fuse.js

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,18 +1321,62 @@
13211321
// ❗Order is important. DO NOT CHANGE.
13221322
var searchers = [ExactMatch, IncludeMatch, PrefixExactMatch, InversePrefixExactMatch, InverseSuffixExactMatch, SuffixExactMatch, InverseExactMatch, FuzzyMatch];
13231323
var searchersLen = searchers.length;
1324-
1325-
// Regex to split by spaces, but keep anything in quotes together
1326-
var SPACE_RE = / +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/;
13271324
var OR_TOKEN = '|';
13281325

1326+
// Tokenize a query string into individual search terms.
1327+
// Respects multi-match quoted tokens like ="said "test"" or ^"hello world"$
1328+
// where inner spaces and quotes are part of the token.
1329+
function tokenize(pattern) {
1330+
var tokens = [];
1331+
var len = pattern.length;
1332+
var i = 0;
1333+
while (i < len) {
1334+
// Skip spaces
1335+
while (i < len && pattern[i] === ' ') i++;
1336+
if (i >= len) break;
1337+
1338+
// Scan past prefix characters (=, !, ^, ') to see if a quote follows
1339+
var j = i;
1340+
while (j < len && pattern[j] !== ' ' && pattern[j] !== '"') j++;
1341+
if (j < len && pattern[j] === '"') {
1342+
// Multi-match token: prefix + "content" (possibly with inner quotes)
1343+
// Find the closing " that ends this token:
1344+
// it must be followed by optional $, then space or end-of-string
1345+
j++; // skip opening quote
1346+
while (j < len) {
1347+
if (pattern[j] === '"') {
1348+
// Check if this is the closing quote
1349+
var next = j + 1;
1350+
if (next >= len || pattern[next] === ' ') {
1351+
j++; // include closing quote
1352+
break;
1353+
}
1354+
if (pattern[next] === '$' && (next + 1 >= len || pattern[next + 1] === ' ')) {
1355+
j += 2; // include "$
1356+
break;
1357+
}
1358+
}
1359+
j++;
1360+
}
1361+
tokens.push(pattern.substring(i, j));
1362+
i = j;
1363+
} else {
1364+
// Regular (unquoted) token: read until space or end
1365+
while (j < len && pattern[j] !== ' ') j++;
1366+
tokens.push(pattern.substring(i, j));
1367+
i = j;
1368+
}
1369+
}
1370+
return tokens;
1371+
}
1372+
13291373
// Return a 2D array representation of the query, for simpler parsing.
13301374
// Example:
13311375
// "^core go$ | rb$ | py$ xy$" => [["^core", "go$"], ["rb$"], ["py$", "xy$"]]
13321376
function parseQuery(pattern) {
13331377
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
13341378
return pattern.split(OR_TOKEN).map(function (item) {
1335-
var query = item.trim().split(SPACE_RE).filter(function (item) {
1379+
var query = tokenize(item.trim()).filter(function (item) {
13361380
return item && !!item.trim();
13371381
});
13381382
var results = [];

dist/fuse.min.cjs

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dist/fuse.min.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/fuse.min.mjs

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dist/fuse.mjs

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -993,17 +993,61 @@ class IncludeMatch extends BaseMatch {
993993
// ❗Order is important. DO NOT CHANGE.
994994
const searchers = [ExactMatch, IncludeMatch, PrefixExactMatch, InversePrefixExactMatch, InverseSuffixExactMatch, SuffixExactMatch, InverseExactMatch, FuzzyMatch];
995995
const searchersLen = searchers.length;
996-
997-
// Regex to split by spaces, but keep anything in quotes together
998-
const SPACE_RE = / +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/;
999996
const OR_TOKEN = '|';
1000997

998+
// Tokenize a query string into individual search terms.
999+
// Respects multi-match quoted tokens like ="said "test"" or ^"hello world"$
1000+
// where inner spaces and quotes are part of the token.
1001+
function tokenize(pattern) {
1002+
const tokens = [];
1003+
const len = pattern.length;
1004+
let i = 0;
1005+
while (i < len) {
1006+
// Skip spaces
1007+
while (i < len && pattern[i] === ' ') i++;
1008+
if (i >= len) break;
1009+
1010+
// Scan past prefix characters (=, !, ^, ') to see if a quote follows
1011+
let j = i;
1012+
while (j < len && pattern[j] !== ' ' && pattern[j] !== '"') j++;
1013+
if (j < len && pattern[j] === '"') {
1014+
// Multi-match token: prefix + "content" (possibly with inner quotes)
1015+
// Find the closing " that ends this token:
1016+
// it must be followed by optional $, then space or end-of-string
1017+
j++; // skip opening quote
1018+
while (j < len) {
1019+
if (pattern[j] === '"') {
1020+
// Check if this is the closing quote
1021+
const next = j + 1;
1022+
if (next >= len || pattern[next] === ' ') {
1023+
j++; // include closing quote
1024+
break;
1025+
}
1026+
if (pattern[next] === '$' && (next + 1 >= len || pattern[next + 1] === ' ')) {
1027+
j += 2; // include "$
1028+
break;
1029+
}
1030+
}
1031+
j++;
1032+
}
1033+
tokens.push(pattern.substring(i, j));
1034+
i = j;
1035+
} else {
1036+
// Regular (unquoted) token: read until space or end
1037+
while (j < len && pattern[j] !== ' ') j++;
1038+
tokens.push(pattern.substring(i, j));
1039+
i = j;
1040+
}
1041+
}
1042+
return tokens;
1043+
}
1044+
10011045
// Return a 2D array representation of the query, for simpler parsing.
10021046
// Example:
10031047
// "^core go$ | rb$ | py$ xy$" => [["^core", "go$"], ["rb$"], ["py$", "xy$"]]
10041048
function parseQuery(pattern, options = {}) {
10051049
return pattern.split(OR_TOKEN).map(item => {
1006-
const query = item.trim().split(SPACE_RE).filter(item => item && !!item.trim());
1050+
const query = tokenize(item.trim()).filter(item => item && !!item.trim());
10071051
const results = [];
10081052
for (let i = 0, len = query.length; i < len; i += 1) {
10091053
const queryItem = query[i];

src/search/extended/parseQuery.ts

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,64 @@ const searchers: Array<typeof BaseMatch> = [
2222

2323
const searchersLen = searchers.length
2424

25-
// Regex to split by spaces, but keep anything in quotes together
26-
const SPACE_RE = / +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/
2725
const OR_TOKEN = '|'
2826

27+
// Tokenize a query string into individual search terms.
28+
// Respects multi-match quoted tokens like ="said "test"" or ^"hello world"$
29+
// where inner spaces and quotes are part of the token.
30+
function tokenize(pattern: string): string[] {
31+
const tokens: string[] = []
32+
const len = pattern.length
33+
let i = 0
34+
35+
while (i < len) {
36+
// Skip spaces
37+
while (i < len && pattern[i] === ' ') i++
38+
if (i >= len) break
39+
40+
// Scan past prefix characters (=, !, ^, ') to see if a quote follows
41+
let j = i
42+
while (j < len && pattern[j] !== ' ' && pattern[j] !== '"') j++
43+
44+
if (j < len && pattern[j] === '"') {
45+
// Multi-match token: prefix + "content" (possibly with inner quotes)
46+
// Find the closing " that ends this token:
47+
// it must be followed by optional $, then space or end-of-string
48+
j++ // skip opening quote
49+
while (j < len) {
50+
if (pattern[j] === '"') {
51+
// Check if this is the closing quote
52+
const next = j + 1
53+
if (next >= len || pattern[next] === ' ') {
54+
j++ // include closing quote
55+
break
56+
}
57+
if (pattern[next] === '$' && (next + 1 >= len || pattern[next + 1] === ' ')) {
58+
j += 2 // include "$
59+
break
60+
}
61+
}
62+
j++
63+
}
64+
tokens.push(pattern.substring(i, j))
65+
i = j
66+
} else {
67+
// Regular (unquoted) token: read until space or end
68+
while (j < len && pattern[j] !== ' ') j++
69+
tokens.push(pattern.substring(i, j))
70+
i = j
71+
}
72+
}
73+
74+
return tokens
75+
}
76+
2977
// Return a 2D array representation of the query, for simpler parsing.
3078
// Example:
3179
// "^core go$ | rb$ | py$ xy$" => [["^core", "go$"], ["rb$"], ["py$", "xy$"]]
3280
export default function parseQuery(pattern: string, options: any = {}): BaseMatch[][] {
3381
return pattern.split(OR_TOKEN).map((item) => {
34-
const query = item
35-
.trim()
36-
.split(SPACE_RE)
37-
.filter((item) => item && !!item.trim())
82+
const query = tokenize(item.trim()).filter((item) => item && !!item.trim())
3883

3984
const results: BaseMatch[] = []
4085
for (let i = 0, len = query.length; i < len; i += 1) {

test/extended-search.test.js

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,51 @@ describe('Searching using extended search', () => {
8888
})
8989
})
9090

91+
describe('Searching with quoted tokens containing spaces and inner quotes', () => {
92+
const list = [
93+
{ text: 'said "test' },
94+
{ text: 'said' },
95+
{ text: 'test' },
96+
{ text: 'hello world' },
97+
{ text: 'hello "world"' }
98+
]
99+
100+
const options = {
101+
useExtendedSearch: true,
102+
keys: ['text']
103+
}
104+
const fuse = new Fuse(list, options)
105+
106+
test('Search: exact-match with inner quote', () => {
107+
const result = fuse.search('="said "test"')
108+
expect(result).toHaveLength(1)
109+
expect(result[0].item.text).toBe('said "test')
110+
})
111+
112+
test('Search: include-match with space (\'\"hello world\")', () => {
113+
const result = fuse.search('\'"hello world"')
114+
expect(result).toHaveLength(1)
115+
expect(result[0].item.text).toBe('hello world')
116+
})
117+
118+
test('Search: prefix-match with space (^"hello w")', () => {
119+
const result = fuse.search('^"hello w"')
120+
expect(result).toHaveLength(1)
121+
expect(result[0].item.text).toBe('hello world')
122+
})
123+
124+
test('Search: suffix-match with space ("lo world"$)', () => {
125+
const result = fuse.search('"lo world"$')
126+
expect(result).toHaveLength(1)
127+
expect(result[0].item.text).toBe('hello world')
128+
})
129+
130+
test('Search: inverse-exact with space (!"hello world")', () => {
131+
const result = fuse.search('!"hello world"')
132+
expect(result).toHaveLength(4)
133+
})
134+
})
135+
91136
describe('ignoreLocation when useExtendedSearch is true', () => {
92137
const list = [
93138
{

0 commit comments

Comments
 (0)