Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion api/_common/http.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ const wrapNetworkError = (error) => {
return error;
};

const UA = 'web-check/1.0 (https://web-check.xyz)';
export const UA =
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' +
'Chrome/120.0.0.0 Safari/537.36 (compatible; web-check/1.0; +https://web-check.xyz)';

const send = async (method, url, body, opts = {}) => {
const finalUrl = appendParams(url, opts.params);
Expand Down
16 changes: 8 additions & 8 deletions api/archives.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,32 +47,32 @@ const getScanFrequency = (firstScan, lastScan, totalScans, changeCount) => {
};

const wayBackHandler = async (url) => {
const cdxUrl = `https://web.archive.org/cdx/search/cdx?url=${url}&output=json&fl=timestamp,statuscode,digest,length,offset`;
// collapse=timestamp:8 returns one row per archived day, slashing payloads
// (Wikipedia: 25MB/373k rows -> 428KB/6k rows) without losing first/last/change counts
const cdxUrl =
`https://web.archive.org/cdx/search/cdx?url=${encodeURIComponent(url)}` +
`&output=json&fl=timestamp,statuscode,digest,length&collapse=timestamp:8`;

try {
const { data } = await httpGet(cdxUrl);

// Check there's data
if (!data || !Array.isArray(data) || data.length <= 1) {
return { skipped: 'Site has never before been archived via the Wayback Machine' };
}

// Remove the header row
data.shift();

// Process and return the results
const firstScan = convertTimestampToDate(data[0][0]);
const lastScan = convertTimestampToDate(data[data.length - 1][0]);
const totalScans = data.length;
const daysArchived = data.length;
const changeCount = countPageChanges(data);
return {
firstScan,
lastScan,
totalScans,
daysArchived,
changeCount,
averagePageSize: getAveragePageSize(data),
scanFrequency: getScanFrequency(firstScan, lastScan, totalScans, changeCount),
scans: data,
scanFrequency: getScanFrequency(firstScan, lastScan, daysArchived, changeCount),
scanUrl: url,
};
} catch (err) {
Expand Down
4 changes: 2 additions & 2 deletions api/carbon.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import middleware from './_common/middleware.js';
import { UA } from './_common/http.js';
import { createLogger } from './_common/logger.js';

const log = createLogger('carbon');

const TIMEOUT = 8000;
const MAX_BYTES = 10 * 1024 * 1024;
const USER_AGENT = 'Mozilla/5.0 (compatible; WebCheck/2.0; +https://web-check.xyz)';

// Sustainable Web Design model v3 constants, matches websitecarbon.com formula
const KWH_PER_GB = 0.81;
Expand Down Expand Up @@ -33,7 +33,7 @@ const fetchByteCount = async (url) => {
const r = await fetch(url, {
signal: AbortSignal.timeout(TIMEOUT),
redirect: 'follow',
headers: { 'user-agent': USER_AGENT, accept: 'text/html,*/*;q=0.1' },
headers: { 'user-agent': UA, accept: 'text/html,*/*;q=0.1' },
});
if (!r.ok) throw new Error(`status ${r.status}`);
if (!r.body) return 0;
Expand Down
1 change: 0 additions & 1 deletion api/dnssec.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ const queryDns = async (domain, type) => {
const res = await httpGet('https://dns.google/resolve', {
params: { name: domain, type },
headers: { Accept: 'application/dns-json' },
timeout: 5000,
});
return res.data;
};
Expand Down
6 changes: 0 additions & 6 deletions api/hsts.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,12 @@ const evaluate = (header) => {
return verdict('Site is compatible with the HSTS preload list!', true, header);
};

const REQUEST_TIMEOUT = 5000;

const hstsHandler = async (url) =>
new Promise((resolve) => {
const req = https.request(url, (res) => {
resolve(evaluate(res.headers['strict-transport-security']));
res.resume();
});
req.setTimeout(REQUEST_TIMEOUT, () => {
req.destroy();
resolve({ error: 'HSTS check timed out' });
});
req.on('error', (e) => resolve({ error: `HSTS check failed: ${e.message}` }));
req.end();
});
Expand Down
2 changes: 1 addition & 1 deletion api/quality.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const qualityHandler = async (url) => {
const endpoint =
`https://www.googleapis.com/pagespeedonline/v5/runPagespeed?` +
`url=${encodeURIComponent(url)}&category=PERFORMANCE&category=ACCESSIBILITY` +
`&category=BEST_PRACTICES&category=SEO&category=PWA&strategy=mobile` +
`&category=BEST_PRACTICES&category=SEO&strategy=mobile` +
`&key=${auth.value}`;

let data;
Expand Down
5 changes: 1 addition & 4 deletions api/rank.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ const rankHandler = async (url) => {
? { auth: { username: TRANCO_USERNAME, password: TRANCO_API_KEY } }
: {};
try {
const response = await httpGet(`https://tranco-list.eu/api/ranks/domain/${domain}`, {
timeout: 5000,
...auth,
});
const response = await httpGet(`https://tranco-list.eu/api/ranks/domain/${domain}`, auth);
if (!response.data?.ranks?.length) {
return {
skipped: `${domain} isn't ranked in the top 1 million sites yet`,
Expand Down
4 changes: 2 additions & 2 deletions api/redirects.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import middleware from './_common/middleware.js';
import { UA } from './_common/http.js';
import { upstreamError } from './_common/upstream.js';

const MAX_REDIRECTS = 12;
const TIMEOUT_MS = 10000;
const USER_AGENT = 'Mozilla/5.0 (compatible; WebCheck/2.0; +https://web-check.xyz)';

// Walks the redirect chain manually, recording each Location header as got did
const redirectsHandler = async (url) => {
Expand All @@ -14,7 +14,7 @@ const redirectsHandler = async (url) => {
const response = await fetch(current, {
redirect: 'manual',
signal: AbortSignal.timeout(TIMEOUT_MS),
headers: { 'user-agent': USER_AGENT },
headers: { 'user-agent': UA },
});
if (response.status < 300 || response.status >= 400) {
if (response.status >= 400) {
Expand Down
3 changes: 2 additions & 1 deletion api/robots-txt.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ const robotsHandler = async (url) => {
const parsed = parseRobotsTxt(res.data || '');
return parsed.robots.length ? parsed : { skipped: 'No robots.txt rules found for this host' };
} catch (error) {
if (error.response?.status === 404) {
const status = error.response?.status;
if (status >= 400 && status < 500) {
return { skipped: 'No robots.txt file present on this host' };
}
return upstreamError(error, 'robots.txt fetch');
Expand Down
4 changes: 1 addition & 3 deletions api/shodan.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ const shodanHandler = async (url) => {
if (auth.skipped) return auth;
const { hostname } = parseTarget(url);
try {
const res = await httpGet(`https://api.shodan.io/shodan/host/${hostname}?key=${auth.value}`, {
timeout: 8000,
});
const res = await httpGet(`https://api.shodan.io/shodan/host/${hostname}?key=${auth.value}`);
return res.data;
} catch (error) {
return upstreamError(error, 'Shodan lookup');
Expand Down
6 changes: 1 addition & 5 deletions api/sitemap.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@ const MAX_DEPTH = 3;
const MAX_CHILD_SITEMAPS = 25;
const MAX_URLS = 5000;

// Browser-ish headers so picky CDNs do not return 406/403 to the default Node UA
const HEADERS = {
'user-agent': 'Mozilla/5.0 (compatible; web-check-bot/1.0; +https://web-check.xyz)',
accept: 'application/xml, text/xml, application/rss+xml, */*;q=0.1',
};
const HEADERS = { accept: 'application/xml, text/xml, application/rss+xml, */*;q=0.1' };

// Reduce a target URL to its origin so child paths resolve cleanly
const toOrigin = (url) => {
Expand Down
3 changes: 2 additions & 1 deletion api/status.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import https from 'https';
import { performance, PerformanceObserver } from 'perf_hooks';
import middleware from './_common/middleware.js';
import { UA } from './_common/http.js';

const statusHandler = async (url) => {
if (!url) {
Expand All @@ -23,7 +24,7 @@ const statusHandler = async (url) => {
try {
startTime = performance.now();
const response = await new Promise((resolve, reject) => {
const req = https.get(url, (res) => {
const req = https.get(url, { headers: { 'user-agent': UA } }, (res) => {
let data = '';
responseCode = res.statusCode;
res.on('data', (chunk) => {
Expand Down
19 changes: 9 additions & 10 deletions api/tls-labs.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,23 @@ import { upstreamError } from './_common/upstream.js';

const SSL_LABS = 'https://api.ssllabs.com/api/v3/analyze';

// Pull a cached SSL Labs report; skip if no fresh cache available
// Return cached report if ready, pending status while a scan is running, else skip
const tlsLabsHandler = async (url) => {
const { hostname } = parseTarget(url);
try {
const res = await httpGet(SSL_LABS, {
params: { host: hostname, fromCache: 'on', maxAge: 24, all: 'done' },
timeout: 8000,
params: { host: hostname, fromCache: 'on', maxAge: 168, all: 'done' },
headers: { 'User-Agent': 'web-check (https://web-check.xyz)' },
});
const data = res.data;
if (!data || data.status !== 'READY' || !data.endpoints?.length) {
return {
skipped:
'No cached SSL Labs report for this host. ' +
'Run a fresh scan at https://www.ssllabs.com/ssltest/',
};
if (data?.status === 'READY' && data.endpoints?.length) return data;
if (data?.status === 'DNS' || data?.status === 'IN_PROGRESS') {
return { pending: true };
}
return data;
if (data?.status === 'ERROR') {
return { error: `SSL Labs: ${data.statusMessage || 'Assessment failed'}` };
}
return { skipped: 'No SSL Labs report available for this host' };
} catch (error) {
return upstreamError(error, 'SSL Labs lookup');
}
Expand Down
10 changes: 9 additions & 1 deletion api/txt-records.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,17 @@ import dns from 'dns/promises';
import middleware from './_common/middleware.js';
import { parseTarget } from './_common/parse-target.js';

const NO_RECORDS = new Set(['ENODATA', 'ENOTFOUND', 'NXDOMAIN']);

const txtRecordHandler = async (url) => {
const { hostname } = parseTarget(url);
const txtRecords = await dns.resolveTxt(hostname);
let txtRecords;
try {
txtRecords = await dns.resolveTxt(hostname);
} catch (error) {
if (NO_RECORDS.has(error.code)) return { skipped: 'No TXT records for this host' };
throw error;
}
// Join chunks (DNS splits long records at 255 bytes), then key=value
const result = {};
for (const chunks of txtRecords) {
Expand Down
1 change: 0 additions & 1 deletion src/client/analysis/rules/quality.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ const LABELS: Record<string, string> = {
accessibility: 'Accessibility',
'best-practices': 'Best Practices',
seo: 'SEO',
pwa: 'PWA',
};

// Convert a 0..1 lighthouse score to a severity bucket
Expand Down
6 changes: 4 additions & 2 deletions src/client/analysis/rules/threats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ const threats: Analyzer = (d) => {
if (Array.isArray(d.urlHaus?.urls) && d.urlHaus.urls.length) {
out.push({ severity: 'critical', title: 'Listed on URLhaus malware feed' });
}
const phishUrl = d.phishTank?.url0?.in_database;
if (phishUrl === 'true' || phishUrl === true) {
const phish = d.phishTank?.url0;
const inDb = phish?.in_database === 'true' || phish?.in_database === true;
const valid = phish?.valid === 'true' || phish?.valid === true;
if (inDb && valid) {
out.push({ severity: 'critical', title: 'Listed on PhishTank' });
}
if (d.cloudmersive?.CleanResult === false) {
Expand Down
8 changes: 2 additions & 6 deletions src/client/components/Results/Archives.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,10 @@ const ArchivesCard = (props: { data: any; title: string; actionButtons: any }):
<Card heading={props.title} actionButtons={props.actionButtons}>
<Row lbl="First Scan" val={data.firstScan} />
<Row lbl="Last Scan" val={data.lastScan} />
<Row lbl="Total Scans" val={data.totalScans} />
<Row lbl="Days Archived" val={data.daysArchived} />
<Row lbl="Change Count" val={data.changeCount} />
<Row lbl="Avg Size" val={`${data.averagePageSize} bytes`} />
{data.scanFrequency?.scansPerDay > 1 ? (
<Row lbl="Avg Scans Per Day" val={data.scanFrequency.scansPerDay} />
) : (
<Row lbl="Avg Days between Scans" val={data.scanFrequency.daysBetweenScans} />
)}
<Row lbl="Avg Days between Archives" val={data.scanFrequency.daysBetweenScans} />

<Note>
View historical versions of this page{' '}
Expand Down
1 change: 1 addition & 0 deletions src/client/components/misc/AdvisoryPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const META: Record<Severity, SevMeta> = {
const Wrapper = styled(Card)`
margin: 0 auto;
width: 95vw;
max-height: 100%;
h2 {
margin: 0 0 0.75rem 0;
}
Expand Down
Loading