Skip to content

Commit e48d5c4

Browse files
eric-zahariaDmytro Sarzhandmytrosarzhan-mono
authored
feat(specs): query categorization (#6258)
Co-authored-by: Dmytro Sarzhan <dima@MacBook-Pro-Dmytro.local> Co-authored-by: dmytrosarzhan-mono <dmytro.sarzhan@monobank.ua>
1 parent 3f74adb commit e48d5c4

8 files changed

Lines changed: 488 additions & 0 deletions

File tree

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Query Categorization events cron
2+
3+
on:
4+
schedule:
5+
- cron: '0 6 1 * *'
6+
workflow_dispatch:
7+
8+
permissions:
9+
contents: read
10+
11+
jobs:
12+
generate:
13+
name: generate QC events
14+
runs-on: ubuntu-22.04
15+
timeout-minutes: 15
16+
steps:
17+
- uses: actions/checkout@v6
18+
with:
19+
ref: main
20+
21+
- name: setup
22+
uses: ./.github/actions/setup
23+
with:
24+
type: minimal
25+
26+
- name: populate index
27+
working-directory: scripts
28+
env:
29+
ALGOLIA_APPLICATION_ID: ${{ secrets.ALGOLIA_APPLICATION_ID }}
30+
ALGOLIA_ADMIN_KEY: ${{ secrets.ALGOLIA_ADMIN_KEY }}
31+
run: yarn runScript index-maintenance/query-categorization/setupIndex.ts
32+
33+
- name: generate events
34+
working-directory: scripts
35+
env:
36+
ALGOLIA_APPLICATION_ID: ${{ secrets.ALGOLIA_APPLICATION_ID }}
37+
ALGOLIA_ADMIN_KEY: ${{ secrets.ALGOLIA_ADMIN_KEY }}
38+
run: yarn runScript index-maintenance/query-categorization/generateEvents.ts
39+
40+
- name: notify slack on failure
41+
uses: slackapi/slack-github-action@v3.0.1
42+
if: failure()
43+
with:
44+
method: chat.postMessage
45+
token: ${{ secrets.SLACK_BOT_TOKEN }}
46+
payload: |
47+
channel: ${{ secrets.SLACK_CHANNEL_ID }}
48+
text: ":alert: QC events cron failed :alert:\nhttps://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import { randomUUID } from 'node:crypto';
2+
3+
import { insightsClient } from '@algolia/client-insights';
4+
import { algoliasearch } from 'algoliasearch';
5+
6+
import { QUERIES } from './setupIndex.ts';
7+
8+
const APP_ID = requireEnv('ALGOLIA_APPLICATION_ID');
9+
const ADMIN_KEY = requireEnv('ALGOLIA_ADMIN_KEY');
10+
const INDEX = process.env.QC_INDEX ?? 'cts_e2e_query_categorization';
11+
12+
const ITERATIONS = 100;
13+
const CONVERSION_RATE = 0.25;
14+
const QUERIES_MIN = 3;
15+
const QUERIES_MAX = 15;
16+
const EVENTS_PER_QUERY = 5;
17+
const HITS_PER_QUERY = 20;
18+
const BATCH_SIZE = 1000;
19+
20+
function requireEnv(name: string): string {
21+
const v = process.env[name];
22+
if (!v) throw new Error(`missing required env var ${name}`);
23+
return v;
24+
}
25+
26+
function randomInt(min: number, max: number): number {
27+
return Math.floor(Math.random() * (max - min + 1)) + min;
28+
}
29+
30+
function pick(n: number, k: number): number[] {
31+
return [...Array(n).keys()].sort(() => Math.random() - 0.5).slice(0, k);
32+
}
33+
34+
export async function generateEvents(): Promise<void> {
35+
const client = algoliasearch(APP_ID, ADMIN_KEY);
36+
const insights = insightsClient(APP_ID, ADMIN_KEY);
37+
const events: Record<string, unknown>[] = [];
38+
39+
for (let iteration = 0; iteration < ITERATIONS; iteration++) {
40+
if ((iteration + 1) % 10 === 0) {
41+
console.log(
42+
`session ${iteration + 1} of ${ITERATIONS}${events.length} click+conversion events generated so far`,
43+
);
44+
}
45+
const userToken = randomUUID();
46+
const sessionQueries = pick(QUERIES.length, randomInt(QUERIES_MIN, QUERIES_MAX)).map((i) => QUERIES[i]);
47+
48+
for (const query of sessionQueries) {
49+
const resp = await client.searchSingleIndex({
50+
indexName: INDEX,
51+
searchParams: { query, clickAnalytics: true, hitsPerPage: HITS_PER_QUERY, userToken },
52+
});
53+
const hits = (resp.hits ?? []) as Array<{ objectID: string }>;
54+
const queryID = (resp as { queryID?: string }).queryID;
55+
56+
if (hits.length < EVENTS_PER_QUERY) {
57+
throw new Error(`query '${query}' returned ${hits.length} hits; need ≥${EVENTS_PER_QUERY}`);
58+
}
59+
if (!queryID) {
60+
throw new Error(`query '${query}' returned no queryID — is clickAnalytics enabled?`);
61+
}
62+
63+
for (const idx of pick(hits.length, EVENTS_PER_QUERY)) {
64+
const clickTs = Date.now() - randomInt(0, 500);
65+
events.push({
66+
eventType: 'click',
67+
eventName: 'product-clicked',
68+
index: INDEX,
69+
userToken,
70+
queryID,
71+
objectIDs: [hits[idx].objectID],
72+
positions: [idx + 1],
73+
timestamp: clickTs,
74+
});
75+
if (Math.random() < CONVERSION_RATE) {
76+
events.push({
77+
eventType: 'conversion',
78+
eventName: 'product-purchased',
79+
index: INDEX,
80+
userToken,
81+
queryID,
82+
objectIDs: [hits[idx].objectID],
83+
timestamp: clickTs + randomInt(1, 100),
84+
});
85+
}
86+
}
87+
}
88+
}
89+
90+
for (let i = 0; i < events.length; i += BATCH_SIZE) {
91+
await insights.pushEvents({ events: events.slice(i, i + BATCH_SIZE) as never });
92+
}
93+
console.log(`Sent ${events.length} click+conversion events on '${INDEX}' across ${ITERATIONS} sessions.`);
94+
}
95+
96+
if (import.meta.url.endsWith(process.argv[1])) {
97+
await generateEvents();
98+
}
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import { algoliasearch } from 'algoliasearch';
2+
3+
const APP_ID = requireEnv('ALGOLIA_APPLICATION_ID');
4+
const ADMIN_KEY = requireEnv('ALGOLIA_ADMIN_KEY');
5+
const INDEX = process.env.QC_INDEX ?? 'cts_e2e_query_categorization';
6+
7+
const RECORDS_PER_QUERY = 12;
8+
9+
type CategoryGroup = { lvl1: string; lvl2: string; queries: string[] };
10+
11+
export const CATEGORY_QUERIES: CategoryGroup[] = [
12+
{
13+
lvl1: 'Living Room',
14+
lvl2: 'Sofas',
15+
queries: [
16+
'sofa',
17+
'couch',
18+
'loveseat',
19+
'sectional',
20+
'sleeper sofa',
21+
'reclining sofa',
22+
'leather sofa',
23+
'velvet sofa',
24+
],
25+
},
26+
{
27+
lvl1: 'Living Room',
28+
lvl2: 'Chairs',
29+
queries: [
30+
'armchair',
31+
'accent chair',
32+
'rocking chair',
33+
'club chair',
34+
'wingback chair',
35+
'swivel chair',
36+
'papasan chair',
37+
],
38+
},
39+
{
40+
lvl1: 'Living Room',
41+
lvl2: 'Lamps',
42+
queries: ['lamp', 'floor lamp', 'table lamp', 'reading lamp', 'arc lamp', 'tripod lamp'],
43+
},
44+
{
45+
lvl1: 'Living Room',
46+
lvl2: 'Tables',
47+
queries: ['coffee table', 'side table', 'console table', 'end table', 'accent table'],
48+
},
49+
{
50+
lvl1: 'Living Room',
51+
lvl2: 'Storage',
52+
queries: ['bookshelf', 'tv stand', 'media console', 'display cabinet', 'bookcase'],
53+
},
54+
{
55+
lvl1: 'Dining Room',
56+
lvl2: 'Tables',
57+
queries: ['dining table', 'kitchen table', 'pub table', 'round table', 'extending table'],
58+
},
59+
{
60+
lvl1: 'Dining Room',
61+
lvl2: 'Chairs',
62+
queries: ['dining chair', 'bar stool', 'counter stool', 'dining bench', 'banquette'],
63+
},
64+
{ lvl1: 'Dining Room', lvl2: 'Storage', queries: ['buffet', 'sideboard', 'china cabinet', 'hutch', 'wine cabinet'] },
65+
{
66+
lvl1: 'Office',
67+
lvl2: 'Desks',
68+
queries: [
69+
'desk',
70+
'standing desk',
71+
'writing desk',
72+
'executive desk',
73+
'computer desk',
74+
'corner desk',
75+
'secretary desk',
76+
],
77+
},
78+
{
79+
lvl1: 'Office',
80+
lvl2: 'Chairs',
81+
queries: ['office chair', 'ergonomic chair', 'task chair', 'drafting chair', 'mesh chair'],
82+
},
83+
{ lvl1: 'Office', lvl2: 'Storage', queries: ['filing cabinet', 'credenza', 'office shelf', 'lateral file'] },
84+
{
85+
lvl1: 'Bedroom',
86+
lvl2: 'Beds',
87+
queries: ['bed frame', 'platform bed', 'canopy bed', 'daybed', 'bunk bed', 'king bed', 'queen bed', 'storage bed'],
88+
},
89+
{
90+
lvl1: 'Bedroom',
91+
lvl2: 'Storage',
92+
queries: ['dresser', 'nightstand', 'wardrobe', 'armoire', 'chest of drawers', 'bedroom bench'],
93+
},
94+
{ lvl1: 'Bedroom', lvl2: 'Accessories', queries: ['mattress', 'headboard', 'vanity', 'jewelry box', 'hope chest'] },
95+
{
96+
lvl1: 'Outdoor',
97+
lvl2: 'Seating',
98+
queries: ['patio chair', 'outdoor sofa', 'lounge chair', 'hammock', 'adirondack chair', 'porch swing'],
99+
},
100+
{ lvl1: 'Outdoor', lvl2: 'Tables', queries: ['patio table', 'outdoor dining table', 'garden table', 'bistro table'] },
101+
{ lvl1: 'Outdoor', lvl2: 'Accessories', queries: ['fire pit', 'patio umbrella', 'outdoor bench'] },
102+
{ lvl1: 'Kids Room', lvl2: 'Beds', queries: ['toddler bed', 'kids bed', 'crib', 'trundle bed'] },
103+
{
104+
lvl1: 'Kids Room',
105+
lvl2: 'Furniture',
106+
queries: ['kids desk', 'play table', 'toy chest', 'kids chair', 'changing table'],
107+
},
108+
];
109+
110+
export const QUERIES: string[] = CATEGORY_QUERIES.flatMap((g) => g.queries);
111+
112+
function requireEnv(name: string): string {
113+
const v = process.env[name];
114+
if (!v) throw new Error(`missing required env var ${name}`);
115+
return v;
116+
}
117+
118+
function buildRecords(): Array<Record<string, unknown>> {
119+
const records: Array<Record<string, unknown>> = [];
120+
for (const { lvl1, lvl2, queries } of CATEGORY_QUERIES) {
121+
for (const query of queries) {
122+
const objectIDBase = query.replace(/ /g, '_');
123+
const title = query.replace(/\b\w/g, (c) => c.toUpperCase());
124+
for (let i = 0; i < RECORDS_PER_QUERY; i++) {
125+
records.push({
126+
objectID: `${objectIDBase}-${i}`,
127+
title: `${title} model ${i}`,
128+
price: 50 + ((i * 17) % 400),
129+
hierarchicalCategories: {
130+
lvl0: 'Furniture',
131+
lvl1: `Furniture > ${lvl1}`,
132+
lvl2: `Furniture > ${lvl1} > ${lvl2}`,
133+
},
134+
});
135+
}
136+
}
137+
}
138+
return records;
139+
}
140+
141+
export async function setupIndex(): Promise<void> {
142+
const records = buildRecords();
143+
const client = algoliasearch(APP_ID, ADMIN_KEY);
144+
145+
await client.clearObjects({ indexName: INDEX });
146+
await client.saveObjects({ indexName: INDEX, objects: records, waitForTasks: true });
147+
await client.setSettings({
148+
indexName: INDEX,
149+
indexSettings: {
150+
searchableAttributes: ['title'],
151+
attributesForFaceting: [
152+
'hierarchicalCategories.lvl0',
153+
'hierarchicalCategories.lvl1',
154+
'hierarchicalCategories.lvl2',
155+
],
156+
},
157+
});
158+
159+
console.log(
160+
`Populated '${INDEX}' with ${records.length} records across ${QUERIES.length} unique queries and configured facets.`,
161+
);
162+
}
163+
164+
if (import.meta.url.endsWith(process.argv[1])) {
165+
await setupIndex();
166+
}

scripts/knip.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
"ci/actions/restore-artifacts/src/index.ts",
1010
"ci/githubActions/createMatrix.ts",
1111
"ci/githubActions/setRunVariables.ts",
12+
"index-maintenance/query-categorization/setupIndex.ts",
13+
"index-maintenance/query-categorization/generateEvents.ts",
1214
"configReplacer.cjs",
1315
"husky/pre-commit.mjs",
1416
"release/createReleasePR.ts"

scripts/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"@actions/core": "3.0.0",
2323
"@actions/exec": "3.0.0",
2424
"@actions/io": "3.0.2",
25+
"@algolia/client-insights": "5.50.2",
2526
"@har-sdk/oas": "2.12.2",
2627
"@octokit/rest": "22.0.1",
2728
"@readme/httpsnippet": "11.1.0",
@@ -32,6 +33,7 @@
3233
"@types/node": "24.12.2",
3334
"@types/semver": "7.7.1",
3435
"@types/spinnies": "0.5.3",
36+
"algoliasearch": "5.50.2",
3537
"chai": "6.2.2",
3638
"chalk": "5.6.2",
3739
"commander": "14.0.2",

specs/search/common/schemas/SearchQuery.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ searchForHitsOptions:
5151
$ref: '../../../common/parameters.yml#/indexName'
5252
type:
5353
$ref: '#/searchTypeDefault'
54+
extensions:
55+
$ref: '#/searchExtensions'
5456
required:
5557
- indexName
5658

@@ -69,3 +71,25 @@ searchTypeDefault:
6971
description: |
7072
- `default`: perform a search query
7173
- `facet` [searches for facet values](https://www.algolia.com/doc/guides/managing-results/refine-results/faceting/#search-for-facet-values).
74+
75+
searchExtensions:
76+
type: object
77+
description: |
78+
Additional parameters for Algolia AI features.
79+
Used to enable [Query Categorization](https://www.algolia.com/doc/guides/algolia-ai/query-categorization/) and other AI-powered capabilities.
80+
properties:
81+
queryCategorization:
82+
$ref: '#/searchExtensionsQueryCategorization'
83+
84+
searchExtensionsQueryCategorization:
85+
type: object
86+
description: Parameters for the [Query Categorization](https://www.algolia.com/doc/guides/algolia-ai/query-categorization/) AI feature.
87+
properties:
88+
enableCategoriesRetrieval:
89+
type: boolean
90+
description: Whether to retrieve category predictions in the response `extensions.queryCategorization` field.
91+
default: false
92+
enableAutoFiltering:
93+
type: boolean
94+
description: Whether to automatically apply category-based filters and boosts to search results.
95+
default: false

0 commit comments

Comments
 (0)