From e4696b794a94dcf24de17f30f3b6c7bd702d2349 Mon Sep 17 00:00:00 2001 From: dacharyc Date: Sat, 2 May 2026 12:09:07 -0400 Subject: [PATCH 1/2] Fix sitemap discovery regression + related slow tests --- src/helpers/get-page-urls.ts | 9 +- test/helpers/mock-sitemap-not-found.ts | 6 + test/integration/check-pipeline.test.ts | 2 + .../integration/cross-check-contracts.test.ts | 2 + test/integration/dependency-chains.test.ts | 2 + test/integration/scoring-pipeline.test.ts | 4 + .../checks/content-start-position.test.ts | 1 + test/unit/checks/llms-txt-coverage.test.ts | 19 +-- test/unit/checks/markdown-url-support.test.ts | 1 + test/unit/checks/page-size-html.test.ts | 1 + test/unit/checks/page-size-markdown.test.ts | 2 + test/unit/helpers/get-page-urls.test.ts | 112 ++++++++++++++++++ test/unit/runner.test.ts | 4 + 13 files changed, 148 insertions(+), 17 deletions(-) diff --git a/src/helpers/get-page-urls.ts b/src/helpers/get-page-urls.ts index 30430b3..96bf66a 100644 --- a/src/helpers/get-page-urls.ts +++ b/src/helpers/get-page-urls.ts @@ -328,14 +328,21 @@ async function discoverSitemapUrls(ctx: CheckContext, originOverride?: string): // Build fallback candidates: origin-level sitemap first, then subpath sitemaps // when the base URL has a non-root path (e.g. swagger.io/docs/). + // Both `sitemap-index.xml` (hyphen) and `sitemap_index.xml` (underscore) are + // observed in the wild; e.g. Document360's CMS emits the underscore form. const fallbackOrigin = originOverride ?? ctx.origin; - const candidates = [`${fallbackOrigin}/sitemap.xml`]; + const candidates = [ + `${fallbackOrigin}/sitemap.xml`, + `${fallbackOrigin}/sitemap-index.xml`, + `${fallbackOrigin}/sitemap_index.xml`, + ]; const baseUrlPath = new URL(ctx.baseUrl).pathname.replace(/\/$/, ''); if (baseUrlPath && baseUrlPath !== '') { const subpathBase = `${fallbackOrigin}${baseUrlPath}`; candidates.push(`${subpathBase}/sitemap.xml`); candidates.push(`${subpathBase}/sitemap-index.xml`); + candidates.push(`${subpathBase}/sitemap_index.xml`); } return candidates; diff --git a/test/helpers/mock-sitemap-not-found.ts b/test/helpers/mock-sitemap-not-found.ts index 2b30fda..63e24bc 100644 --- a/test/helpers/mock-sitemap-not-found.ts +++ b/test/helpers/mock-sitemap-not-found.ts @@ -14,6 +14,8 @@ export function mockSitemapNotFound(server: SetupServerApi, baseUrl: string): vo const handlers = [ http.get(`${parsed.origin}/robots.txt`, () => new HttpResponse('', { status: 404 })), http.get(`${parsed.origin}/sitemap.xml`, () => new HttpResponse('', { status: 404 })), + http.get(`${parsed.origin}/sitemap-index.xml`, () => new HttpResponse('', { status: 404 })), + http.get(`${parsed.origin}/sitemap_index.xml`, () => new HttpResponse('', { status: 404 })), ]; const subpath = parsed.pathname.replace(/\/$/, ''); if (subpath && subpath !== '') { @@ -26,6 +28,10 @@ export function mockSitemapNotFound(server: SetupServerApi, baseUrl: string): vo `${parsed.origin}${subpath}/sitemap-index.xml`, () => new HttpResponse('', { status: 404 }), ), + http.get( + `${parsed.origin}${subpath}/sitemap_index.xml`, + () => new HttpResponse('', { status: 404 }), + ), ); } server.use(...handlers); diff --git a/test/integration/check-pipeline.test.ts b/test/integration/check-pipeline.test.ts index 7beb6db..d6778a9 100644 --- a/test/integration/check-pipeline.test.ts +++ b/test/integration/check-pipeline.test.ts @@ -56,6 +56,8 @@ function setupSite( handlers.push( http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })), http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })), + http.get(`http://${host}/sitemap-index.xml`, () => new HttpResponse('', { status: 404 })), + http.get(`http://${host}/sitemap_index.xml`, () => new HttpResponse('', { status: 404 })), ); const defaultCacheHeaders = opts.cacheControl ? { 'Cache-Control': opts.cacheControl } : {}; diff --git a/test/integration/cross-check-contracts.test.ts b/test/integration/cross-check-contracts.test.ts index 7ab9170..ba383a1 100644 --- a/test/integration/cross-check-contracts.test.ts +++ b/test/integration/cross-check-contracts.test.ts @@ -150,6 +150,7 @@ describe('previousResults safety: checks handle missing dependencies gracefully' const ctx = createContext(`http://${host}`, { requestDelay: 0 }); // No llms-txt-exists in previousResults, no llms.txt + mockSitemapNotFound(server, `http://${host}`); server.use( http.get(`http://${host}/llms.txt`, () => new HttpResponse(null, { status: 404 })), http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })), @@ -368,6 +369,7 @@ describe('cross-check field contracts: empty/missing upstream details', () => { details: { discoveredFiles: [] }, }); + mockSitemapNotFound(server, `http://${host}`); server.use( http.get(`http://${host}/llms.txt`, () => new HttpResponse(null, { status: 404 })), http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })), diff --git a/test/integration/dependency-chains.test.ts b/test/integration/dependency-chains.test.ts index d1f90bd..f2ac05b 100644 --- a/test/integration/dependency-chains.test.ts +++ b/test/integration/dependency-chains.test.ts @@ -48,6 +48,8 @@ function setupSite( http.get(`http://${host}/docs/llms.txt`, () => new HttpResponse(null, { status: 404 })), http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })), http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })), + http.get(`http://${host}/sitemap-index.xml`, () => new HttpResponse('', { status: 404 })), + http.get(`http://${host}/sitemap_index.xml`, () => new HttpResponse('', { status: 404 })), ); for (const page of opts.pages) { diff --git a/test/integration/scoring-pipeline.test.ts b/test/integration/scoring-pipeline.test.ts index 1c7aea6..5aa4f50 100644 --- a/test/integration/scoring-pipeline.test.ts +++ b/test/integration/scoring-pipeline.test.ts @@ -90,6 +90,10 @@ function setupSite( http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })), ); } + handlers.push( + http.get(`http://${host}/sitemap-index.xml`, () => new HttpResponse('', { status: 404 })), + http.get(`http://${host}/sitemap_index.xml`, () => new HttpResponse('', { status: 404 })), + ); // Root URL for homepage-based discovery const pageLinks = opts.pages diff --git a/test/unit/checks/content-start-position.test.ts b/test/unit/checks/content-start-position.test.ts index ba9e9da..964c067 100644 --- a/test/unit/checks/content-start-position.test.ts +++ b/test/unit/checks/content-start-position.test.ts @@ -480,6 +480,7 @@ describe('content-start-position', () => { // ── Fallback to baseUrl ── it('falls back to baseUrl when no llms.txt', async () => { + mockSitemapNotFound(server, 'http://csp-fb.local'); server.use( http.get( 'http://csp-fb.local/llms.txt', diff --git a/test/unit/checks/llms-txt-coverage.test.ts b/test/unit/checks/llms-txt-coverage.test.ts index c58e891..1a6b6df 100644 --- a/test/unit/checks/llms-txt-coverage.test.ts +++ b/test/unit/checks/llms-txt-coverage.test.ts @@ -4,6 +4,7 @@ import { setupServer } from 'msw/node'; import { getCheck } from '../../../src/checks/registry.js'; import { createContext } from '../../../src/runner.js'; import type { DiscoveredFile } from '../../../src/types.js'; +import { mockSitemapNotFound } from '../../helpers/mock-sitemap-not-found.js'; import { hasLocaleCodeAt, filterToUnprefixedLocale, @@ -288,15 +289,7 @@ describe('llms-txt-coverage', () => { const host = 'cov-no-sitemap.local'; const ctx = makeCtx(host, [`http://${host}/docs/page`], '/docs'); - server.use( - http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })), - http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })), - http.get(`http://${host}/docs/sitemap.xml`, () => new HttpResponse('', { status: 404 })), - http.get( - `http://${host}/docs/sitemap-index.xml`, - () => new HttpResponse('', { status: 404 }), - ), - ); + mockSitemapNotFound(server, `http://${host}/docs`); const result = await check.run(ctx); expect(result.status).toBe('skip'); @@ -527,10 +520,8 @@ describe('llms-txt-coverage', () => { const ctx = makeCtx(host, docPages, '/docs'); + mockSitemapNotFound(server, `http://${host}/docs`); server.use( - // No main sitemap - http.get(`http://${host}/robots.txt`, () => new HttpResponse('', { status: 404 })), - http.get(`http://${host}/sitemap.xml`, () => new HttpResponse('', { status: 404 })), // Docs sitemap is an index http.get( `http://${host}/docs/sitemap.xml`, @@ -548,10 +539,6 @@ describe('llms-txt-coverage', () => { headers: { 'content-type': 'application/xml' }, }), ), - http.get( - `http://${host}/docs/sitemap-index.xml`, - () => new HttpResponse('', { status: 404 }), - ), ); const result = await check.run(ctx); diff --git a/test/unit/checks/markdown-url-support.test.ts b/test/unit/checks/markdown-url-support.test.ts index d4c2f4f..b8dc754 100644 --- a/test/unit/checks/markdown-url-support.test.ts +++ b/test/unit/checks/markdown-url-support.test.ts @@ -603,6 +603,7 @@ describe('markdown-url-support', () => { // false-positive the check for a /auth/index.html page. it('does not test /foo.md when /foo/index.html came from sitemap (issue #77 isolation)', async () => { const requestLog: string[] = []; + mockSitemapNotFound(server, 'http://parentclean.local'); server.use( http.get('http://parentclean.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( diff --git a/test/unit/checks/page-size-html.test.ts b/test/unit/checks/page-size-html.test.ts index 58dea4d..f6116e0 100644 --- a/test/unit/checks/page-size-html.test.ts +++ b/test/unit/checks/page-size-html.test.ts @@ -308,6 +308,7 @@ describe('page-size-html', () => { }); it('falls back to baseUrl when no llms.txt', async () => { + mockSitemapNotFound(server, 'http://ps-html-fb.local'); server.use( http.get( 'http://ps-html-fb.local/llms.txt', diff --git a/test/unit/checks/page-size-markdown.test.ts b/test/unit/checks/page-size-markdown.test.ts index 211aa37..77a9605 100644 --- a/test/unit/checks/page-size-markdown.test.ts +++ b/test/unit/checks/page-size-markdown.test.ts @@ -147,6 +147,7 @@ describe('page-size-markdown', () => { it('works in standalone mode when dependencies never ran', async () => { mockNoLlmsTxt('ps-md-standalone.local'); + mockSitemapNotFound(server, 'http://ps-md-standalone.local'); server.use( http.get( 'http://ps-md-standalone.local/robots.txt', @@ -187,6 +188,7 @@ describe('page-size-markdown', () => { it('skips in standalone mode when no markdown found', async () => { mockNoLlmsTxt('ps-md-nomd.local'); + mockSitemapNotFound(server, 'http://ps-md-nomd.local'); server.use( http.get('http://ps-md-nomd.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get('http://ps-md-nomd.local/sitemap.xml', () => new HttpResponse('', { status: 404 })), diff --git a/test/unit/helpers/get-page-urls.test.ts b/test/unit/helpers/get-page-urls.test.ts index 1ad7933..6b1d252 100644 --- a/test/unit/helpers/get-page-urls.test.ts +++ b/test/unit/helpers/get-page-urls.test.ts @@ -719,6 +719,7 @@ describe('getPageUrls', () => { }); it('fetches and parses sitemap.xml when no llms.txt links', async () => { + mockSitemapNotFound(server, 'http://sitemap-test.local'); server.use( http.get( 'http://sitemap-test.local/robots.txt', @@ -748,6 +749,7 @@ describe('getPageUrls', () => { }); it('handles sitemap index files (follows sub-sitemaps)', async () => { + mockSitemapNotFound(server, 'http://index-test.local'); server.use( http.get('http://index-test.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -784,6 +786,7 @@ describe('getPageUrls', () => { }); it('filters sitemap URLs to same-origin only', async () => { + mockSitemapNotFound(server, 'http://origin-test.local'); server.use( http.get('http://origin-test.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -810,6 +813,7 @@ describe('getPageUrls', () => { }); it('falls back to baseUrl when both llms.txt and sitemap are empty', async () => { + mockSitemapNotFound(server, 'http://empty-test.local'); server.use( http.get('http://empty-test.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -825,6 +829,7 @@ describe('getPageUrls', () => { }); it('handles malformed sitemap XML gracefully', async () => { + mockSitemapNotFound(server, 'http://bad-xml.local'); server.use( http.get('http://bad-xml.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -848,6 +853,7 @@ describe('getPageUrls', () => { (_, i) => ` http://big-sitemap.local/page/${i}`, ).join('\n'); + mockSitemapNotFound(server, 'http://big-sitemap.local'); server.use( http.get('http://big-sitemap.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -878,6 +884,7 @@ describe('getPageUrls', () => { (_, i) => ` http://cap-prefix.local/en/6.0/page/${i}`, ).join('\n'); + mockSitemapNotFound(server, 'http://cap-prefix.local/en/6.0'); server.use( http.get('http://cap-prefix.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -943,6 +950,7 @@ describe('getPageUrls', () => { .map((u) => ` ${u}`) .join('\n'); + mockSitemapNotFound(server, 'http://ver-dedup.local'); server.use( http.get('http://ver-dedup.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -966,6 +974,7 @@ describe('getPageUrls', () => { it('filters sitemap index to default locale, skipping non-English sub-sitemaps (#30)', async () => { // Django-like sitemap index: 12 locale sitemaps, only en should be fetched + mockSitemapNotFound(server, 'http://locale-idx.local'); server.use( http.get('http://locale-idx.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -1006,6 +1015,7 @@ describe('getPageUrls', () => { }); it('handles sitemap fetch network errors gracefully', async () => { + mockSitemapNotFound(server, 'http://net-err.local'); server.use( http.get('http://net-err.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get('http://net-err.local/sitemap.xml', () => HttpResponse.error()), @@ -1085,6 +1095,7 @@ describe('getPageUrls', () => { }); it('falls back to /sitemap.xml when robots.txt has no Sitemap directive', async () => { + mockSitemapNotFound(server, 'http://no-directive.local'); server.use( http.get( 'http://no-directive.local/robots.txt', @@ -1107,6 +1118,95 @@ describe('getPageUrls', () => { expect(result.urls).toEqual(['http://no-directive.local/page']); }); + it('falls back to /sitemap-index.xml at root when robots.txt lacks Sitemap directive and /sitemap.xml is absent', async () => { + server.use( + http.get( + 'http://root-index.local/robots.txt', + () => new HttpResponse('User-agent: *\nAllow: /\n', { status: 200 }), + ), + http.get('http://root-index.local/sitemap.xml', () => new HttpResponse('', { status: 404 })), + http.get( + 'http://root-index.local/sitemap_index.xml', + () => new HttpResponse('', { status: 404 }), + ), + http.get( + 'http://root-index.local/sitemap-index.xml', + () => + new HttpResponse( + ` + + http://root-index.local/sitemap-0.xml +`, + { status: 200, headers: { 'Content-Type': 'application/xml' } }, + ), + ), + http.get( + 'http://root-index.local/sitemap-0.xml', + () => + new HttpResponse( + ` + http://root-index.local/page-a + http://root-index.local/page-b +`, + { status: 200, headers: { 'Content-Type': 'application/xml' } }, + ), + ), + ); + + const ctx = makeCtx('http://root-index.local'); + const result = await getPageUrls(ctx); + expect(result.urls).toEqual([ + 'http://root-index.local/page-a', + 'http://root-index.local/page-b', + ]); + }); + + it('falls back to /sitemap_index.xml (underscore variant) at root when other candidates 404', async () => { + server.use( + http.get( + 'http://underscore-index.local/robots.txt', + () => new HttpResponse('User-agent: *\nAllow: /\n', { status: 200 }), + ), + http.get( + 'http://underscore-index.local/sitemap.xml', + () => new HttpResponse('', { status: 404 }), + ), + http.get( + 'http://underscore-index.local/sitemap-index.xml', + () => new HttpResponse('', { status: 404 }), + ), + http.get( + 'http://underscore-index.local/sitemap_index.xml', + () => + new HttpResponse( + ` + + http://underscore-index.local/sitemap-en.xml +`, + { status: 200, headers: { 'Content-Type': 'application/xml' } }, + ), + ), + http.get( + 'http://underscore-index.local/sitemap-en.xml', + () => + new HttpResponse( + ` + http://underscore-index.local/article-1 + http://underscore-index.local/article-2 +`, + { status: 200, headers: { 'Content-Type': 'application/xml' } }, + ), + ), + ); + + const ctx = makeCtx('http://underscore-index.local'); + const result = await getPageUrls(ctx); + expect(result.urls).toEqual([ + 'http://underscore-index.local/article-1', + 'http://underscore-index.local/article-2', + ]); + }); + it('warns and skips gzipped sitemap from robots.txt', async () => { server.use( http.get( @@ -1124,6 +1224,7 @@ describe('getPageUrls', () => { }); it('warns and skips gzipped sub-sitemap from sitemap index', async () => { + mockSitemapNotFound(server, 'http://gz-index.local'); server.use( http.get('http://gz-index.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -1446,6 +1547,7 @@ describe('getPageUrls', () => { it('fetches llms.txt directly when llms-txt-exists has not run', async () => { const llmsTxt = `# Docs\n> Summary\n## Links\n- [Intro](http://direct-llms.local/docs/intro): Intro\n- [Guide](http://direct-llms.local/docs/guide): Guide\n`; + mockSitemapNotFound(server, 'http://direct-llms.local'); server.use( http.get( 'http://direct-llms.local/llms.txt', @@ -1470,6 +1572,7 @@ describe('getPageUrls', () => { }); it('skips llms.txt with non-text content-type in standalone mode', async () => { + mockSitemapNotFound(server, 'http://nontext-llms.local'); server.use( http.get( 'http://nontext-llms.local/llms.txt', @@ -1497,6 +1600,7 @@ describe('getPageUrls', () => { }); it('skips llms.txt that returns HTML in standalone mode', async () => { + mockSitemapNotFound(server, 'http://html-llms.local'); server.use( http.get( 'http://html-llms.local/llms.txt', @@ -1520,6 +1624,7 @@ describe('getPageUrls', () => { }); it('skips empty llms.txt in standalone mode', async () => { + mockSitemapNotFound(server, 'http://empty-llms.local'); server.use( http.get( 'http://empty-llms.local/llms.txt', @@ -1540,6 +1645,7 @@ describe('getPageUrls', () => { }); it('handles llms.txt fetch errors gracefully in standalone mode', async () => { + mockSitemapNotFound(server, 'http://err-llms.local'); server.use( http.get('http://err-llms.local/llms.txt', () => HttpResponse.error()), http.get('http://err-llms.local/docs/llms.txt', () => HttpResponse.error()), @@ -1612,6 +1718,7 @@ describe('getPageUrls', () => { }); it('scopes sitemap URLs to the baseUrl path prefix', async () => { + mockSitemapNotFound(server, 'http://sitemap-scope.local/docs'); server.use( http.get( 'http://sitemap-scope.local/robots.txt', @@ -1651,6 +1758,7 @@ describe('getPageUrls', () => { it('discovers sitemap at docs subpath when origin-level sitemap is empty (#32)', async () => { // Simulate Swagger UI: robots.txt 404, /sitemap.xml 404, but /docs/sitemap-index.xml exists + mockSitemapNotFound(server, 'http://subpath-sm.local/docs'); server.use( http.get('http://subpath-sm.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -1786,6 +1894,7 @@ describe('getPageUrls', () => { }); it('explicit preferredLocale applies to sitemap index filtering', async () => { + mockSitemapNotFound(server, 'http://opt-sitemap-locale.local'); server.use( http.get( 'http://opt-sitemap-locale.local/robots.txt', @@ -1829,6 +1938,7 @@ describe('getPageUrls', () => { }); it('skipRefinement returns unfiltered sitemap URLs', async () => { + mockSitemapNotFound(server, 'http://skip-refine.local'); server.use( http.get('http://skip-refine.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -1886,6 +1996,7 @@ describe('getPageUrls', () => { }); it('respects maxUrls cap when following sitemap index sub-sitemaps', async () => { + mockSitemapNotFound(server, 'http://cap-index.local'); server.use( http.get('http://cap-index.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( @@ -2214,6 +2325,7 @@ describe('originalMdUrls (issue #77)', () => { message: 'No llms.txt', details: { discoveredFiles: [] }, }); + mockSitemapNotFound(server, 'http://test77d.local'); server.use( http.get('http://test77d.local/robots.txt', () => new HttpResponse('', { status: 404 })), http.get( diff --git a/test/unit/runner.test.ts b/test/unit/runner.test.ts index 80b7fab..043480a 100644 --- a/test/unit/runner.test.ts +++ b/test/unit/runner.test.ts @@ -4,6 +4,7 @@ import { setupServer } from 'msw/node'; import { createContext, normalizeUrl, runChecks } from '../../src/runner.js'; import { registerCheck } from '../../src/checks/registry.js'; import '../../src/checks/index.js'; +import { mockSitemapNotFound } from '../helpers/mock-sitemap-not-found.js'; const server = setupServer(); @@ -206,6 +207,7 @@ describe('runner', () => { // page-size-markdown depends on [['markdown-url-support', 'content-negotiation']] // When neither dependency runs (filtered out), page-size-markdown should still run // rather than being skipped. + mockSitemapNotFound(server, 'http://standalone.local'); server.use( http.get('http://standalone.local/llms.txt', () => new HttpResponse(null, { status: 404 })), http.get( @@ -429,6 +431,7 @@ describe('runner', () => { }); it('includes timestamp and url in report', async () => { + mockSitemapNotFound(server, 'http://meta.local'); server.use( http.get('http://meta.local/llms.txt', () => new HttpResponse(null, { status: 404 })), http.get('http://meta.local/docs/llms.txt', () => new HttpResponse(null, { status: 404 })), @@ -456,6 +459,7 @@ describe('runner', () => { }); it('includes discoverySources in report when page discovery runs', async () => { + mockSitemapNotFound(server, 'http://sources.local'); server.use( http.get('http://sources.local/llms.txt', () => HttpResponse.text('# Docs\n## Links\n- [A](http://sources.local/docs/a): A\n'), From 209f0d977ef50a84906004f12a3d87381474b4c1 Mon Sep 17 00:00:00 2001 From: dacharyc Date: Sat, 2 May 2026 12:11:00 -0400 Subject: [PATCH 2/2] Fix slow discovery test --- test/integration/scoring-pipeline.test.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/integration/scoring-pipeline.test.ts b/test/integration/scoring-pipeline.test.ts index 5aa4f50..875ade1 100644 --- a/test/integration/scoring-pipeline.test.ts +++ b/test/integration/scoring-pipeline.test.ts @@ -419,6 +419,12 @@ describe('scoring pipeline: resolutions populated for real check failures', () = it('each failing check produces a resolution string', async () => { const { pages } = makePages(host, 6); setupSite(host, { pages, cacheControl: 'max-age=300' }); + // No llms.txt or sitemap → discovery falls back to baseUrl, and + // markdown-url-support probes baseUrl's .md candidates. + server.use( + http.get(`http://${host}/.md`, () => new HttpResponse(null, { status: 404 })), + http.get(`http://${host}/index.md`, () => new HttpResponse(null, { status: 404 })), + ); const report = await runChecks(`http://${host}`, { requestDelay: 0,