forked from hed-standard/hed-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlychee.toml
More file actions
90 lines (72 loc) · 3.09 KB
/
lychee.toml
File metadata and controls
90 lines (72 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Lychee link checker configuration
# This file configures how lychee validates links in the repository
#
# For more information about lychee configuration, see:
# https://github.com/lycheeverse/lychee#configuration
#
# This configuration addresses false positive errors from the link checker:
# - 403 Forbidden: Some sites (npmjs.com, mathworks.com, doi.org) block automated
# requests but work fine in browsers
# - 429 Too Many Requests: Rate limiting doesn't mean the link is broken
# Accept these HTTP status codes as valid
# 200: OK (standard success)
# 204: No Content (valid response with no body)
# 206: Partial Content (valid for range requests)
# 403: Forbidden (some sites block automated requests but links work in browsers)
# 429: Too Many Requests (rate limiting, link may be valid)
accept = [200, 204, 206, 403, 429]
# Timeout for requests (in seconds)
timeout = 30
# Maximum number of retries per link
max_retries = 3
# Maximum number of concurrent network requests
max_concurrency = 8
# User agent string to use for requests
# Some sites require a browser-like user agent to avoid blocking
user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0"
# Exclude patterns - regex patterns to exclude from checking
# These are Sphinx theme template files with Jinja2 variables that shouldn't be checked
exclude_path = [
'webpack-macros\.html$',
'sbt-webpack-macros\.html$',
'.*-macros\.html$',
'spec_tests/hed-examples',
'spec_tests/hed-schemas',
'spec_tests/hed-tests',
]
# Exclude specific URLs from checking (by regex)
exclude = [
# Local/internal URLs
'^http://127\.0\.0\.',
'^http://localhost',
'^https://localhost',
'^file://',
# ScienceDirect (require authentication/cookies)
'https://www\.sciencedirect\.com/science/article/pii/S1053811921010387',
'https://www\.sciencedirect\.com/science/article/pii/S0010945221001106',
'https://www\.sciencedirect\.com/science/article/pii/S1388245717309069',
# Springer (503 errors but links work in browsers)
'^https?://link\.springer\.com/',
# INCF (certificate/network issues)
'^https?://.*\.incf\.org/',
'^https?://incf\.org/',
'^https?://neuroinformatics\.incf\.org/',
# DOI links that return 403 but work in browsers
'^https?://doi\.org/10\.1111/epi\.18113',
# NPM (blocks automated requests)
'^https?://.*\.npmjs\.com/package/hed-validator',
# MathWorks (blocks automated requests)
'^https?://.*\.mathworks\.com/',
# Brain Meeting poster links (expired/removed)
'^https?://brainmeeting.*\.ipostersessions\.com/',
'^https?://globalbrainconsortium\.org/documents/GBC_March-2023_Agenda_Annual_Meeting\.pdf',
# CANCTA network (authentication/access issues)
'^https?://.*\.cancta\.net/',
'^https?://cancta\.net/',
# GitHub discussions (programmatic access blocked)
'^https?://github\.com/hed-standard/hed-python/discussions',
# HED tools services (programmatic access blocked)
'^https?://hedtools\.org/hed/services_submit',
# Internal anchor links (false positives from lychee)
'(_anchor|-anchor)',
]