Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions src/lib/command-framework/apify-command.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/* eslint-disable max-classes-per-file */

import process from 'node:process';
import type { parseArgs, ParseArgsConfig, ParseArgsOptionDescriptor } from 'node:util';

import type { Awaitable } from '@crawlee/types';
Expand Down Expand Up @@ -144,6 +145,39 @@ const jsonFlagDefinition = {
multiple: false,
} as const satisfies ParseArgsOptionDescriptor;

const userAgentFlagDefinition = {
type: 'string',
multiple: false,
} as const satisfies ParseArgsOptionDescriptor;

export const USER_AGENT_FLAG_NAME = 'user-agent';
export const USER_AGENT_ENV_VAR = 'APIFY_CLI_USER_AGENT';
export const USER_AGENT_MAX_LENGTH = 256;
// Scope the caller-id flag to the public `apify` entrypoint. The `actor` entrypoint
// runs inside Actor Docker images where caller-identification is not meaningful.
const USER_AGENT_SUPPORTED_ENTRYPOINTS = new Set(['apify']);

function sanitizeUserAgentValue(value: string | undefined): string | undefined {
if (typeof value !== 'string') {
return undefined;
}
// Strip ASCII control chars (0x00-0x1F and 0x7F) to keep telemetry payloads clean.
// eslint-disable-next-line no-control-regex
const stripped = value.replace(/[\u0000-\u001f\u007f]/g, '');
const trimmed = stripped.trim();
if (!trimmed) {
return undefined;
}
return trimmed.length > USER_AGENT_MAX_LENGTH ? trimmed.slice(0, USER_AGENT_MAX_LENGTH) : trimmed;
}

export function resolveUserAgentForTelemetry(
flagValue: string | undefined,
envValue: string | undefined,
): string | undefined {
return sanitizeUserAgentValue(flagValue) ?? sanitizeUserAgentValue(envValue);
}

export const commandRegistry = new Map<string, typeof BuiltApifyCommand>();

type ParseResult = ReturnType<typeof parseArgs<ReturnType<ApifyCommand['_buildParseArgsOption']>>>;
Expand Down Expand Up @@ -281,6 +315,17 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B
this.ctor.printHelp();
}

if (USER_AGENT_SUPPORTED_ENTRYPOINTS.has(this.entrypoint)) {
const rawUserAgentFlag = rawFlags[USER_AGENT_FLAG_NAME];
const resolvedUserAgent = resolveUserAgentForTelemetry(
typeof rawUserAgentFlag === 'string' ? rawUserAgentFlag : undefined,
process.env[USER_AGENT_ENV_VAR],
);
if (resolvedUserAgent) {
this.telemetryData.userAgent = resolvedUserAgent;
}
}

// Cheating a bit here with the types, but its fine

this.args = {} as any;
Expand Down Expand Up @@ -675,14 +720,20 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B
}

protected _buildParseArgsOption() {
const baseOptions: Record<string, ParseArgsOptionDescriptor> = {
help: helpFlagDefinition,
};

if (USER_AGENT_SUPPORTED_ENTRYPOINTS.has(this.entrypoint)) {
baseOptions[USER_AGENT_FLAG_NAME] = userAgentFlagDefinition;
}

const object = {
allowNegative: true,
allowPositionals: true,
strict: true,
tokens: true,
options: {
help: helpFlagDefinition,
} as {
options: baseOptions as {
help: typeof helpFlagDefinition;
json: typeof jsonFlagDefinition;
[k: string]: ParseArgsOptionDescriptor;
Expand Down
1 change: 1 addition & 0 deletions src/lib/hooks/telemetry/trackEvent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export interface TrackEventMap {
exitCode?: number;
durationMs?: number;
aiAgent?: string;
userAgent?: string;
isCi?: boolean;
ciProvider?: string;
isInteractive?: boolean;
Expand Down
29 changes: 29 additions & 0 deletions test/e2e/commands/user-agent.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { runCli } from '../__helpers__/run-cli.js';

describe('[e2e] --user-agent flag', () => {
it('accepts --user-agent on any command without erroring', async () => {
const result = await runCli('apify', ['help', '--user-agent', 'test-caller/1.0.0']);
expect(result.exitCode, `stderr: ${result.stderr}`).toBe(0);
});

it('accepts APIFY_CLI_USER_AGENT env var on any command without erroring', async () => {
const result = await runCli('apify', ['help'], {
env: { APIFY_CLI_USER_AGENT: 'test-caller/env-1.0.0' },
});
expect(result.exitCode, `stderr: ${result.stderr}`).toBe(0);
});

it('rejects --user-agent without a value (string flag)', async () => {
// Passing --user-agent with no value should surface a parseArgs error,
// since the flag is declared as a string type.
const result = await runCli('apify', ['help', '--user-agent']);
expect(result.exitCode).not.toBe(0);
});

it('rejects --user-agent under the actor entrypoint', async () => {
// The flag is scoped to the public apify entrypoint only — the actor
// entrypoint runs inside Actor Docker images where caller-id is meaningless.
const result = await runCli('actor', ['help', '--user-agent', 'foo']);
expect(result.exitCode).not.toBe(0);
});
});
224 changes: 224 additions & 0 deletions test/local/lib/command-framework/user-agent.test.ts
Comment thread
patrikbraborec marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/* eslint-disable max-classes-per-file */
import { parseArgs } from 'node:util';

import {
ApifyCommand,
type BuiltApifyCommand as _BuiltApifyCommand,
resolveUserAgentForTelemetry,
USER_AGENT_ENV_VAR,
USER_AGENT_FLAG_NAME,
USER_AGENT_MAX_LENGTH,
} from '../../../../src/lib/command-framework/apify-command.js';
import { Flags } from '../../../../src/lib/command-framework/flags.js';

const BuiltApifyCommand = ApifyCommand as typeof _BuiltApifyCommand;

class NoOpCommand extends BuiltApifyCommand {
static override name = 'noop' as const;
static override description = 'Does nothing.';
async run() {
// no-op
}
}

class CommandWithFlags extends BuiltApifyCommand {
static override name = 'with-flags' as const;
static override description = 'Has flags.';
static override flags = {
foo: Flags.string({ description: 'foo flag' }),
};

async run() {
// no-op
}
}

describe('resolveUserAgentForTelemetry()', () => {
test('flag value wins over env value', () => {
expect(resolveUserAgentForTelemetry('flag', 'env')).toBe('flag');
});

test('env value used as fallback', () => {
expect(resolveUserAgentForTelemetry(undefined, 'env')).toBe('env');
});

test('undefined when neither set', () => {
expect(resolveUserAgentForTelemetry(undefined, undefined)).toBeUndefined();
});

test('empty/whitespace flag falls through to env', () => {
expect(resolveUserAgentForTelemetry(' ', 'env')).toBe('env');
expect(resolveUserAgentForTelemetry('', 'env')).toBe('env');
});

test('empty/whitespace env returns undefined when flag also empty', () => {
expect(resolveUserAgentForTelemetry('', '')).toBeUndefined();
expect(resolveUserAgentForTelemetry(undefined, ' ')).toBeUndefined();
});

test('trims surrounding whitespace on returned value', () => {
expect(resolveUserAgentForTelemetry(' foo ', undefined)).toBe('foo');
expect(resolveUserAgentForTelemetry(undefined, ' bar ')).toBe('bar');
});

test('strips ASCII control characters silently', () => {
expect(resolveUserAgentForTelemetry('plugin\u0000/1.0\u0007', undefined)).toBe('plugin/1.0');
expect(resolveUserAgentForTelemetry('line\nbreak\rthing\ttab', undefined)).toBe('linebreakthingtab');
expect(resolveUserAgentForTelemetry('\u007f\u0001only-control\u001f', undefined)).toBe('only-control');
});

test('returns undefined when input is only control characters', () => {
expect(resolveUserAgentForTelemetry('\u0000\u0001\u001f', undefined)).toBeUndefined();
});

test('caps length at USER_AGENT_MAX_LENGTH', () => {
const long = 'x'.repeat(USER_AGENT_MAX_LENGTH + 100);
const resolved = resolveUserAgentForTelemetry(long, undefined);
expect(resolved).toBeDefined();
expect(resolved!.length).toBe(USER_AGENT_MAX_LENGTH);
});

test('does not touch values at or below cap', () => {
const exact = 'y'.repeat(USER_AGENT_MAX_LENGTH);
expect(resolveUserAgentForTelemetry(exact, undefined)).toBe(exact);
});
});

describe('--user-agent flag registration', () => {
test('is parseable on commands with no declared flags', () => {
const instance = new NoOpCommand('apify', NoOpCommand.name, NoOpCommand.name);
// eslint-disable-next-line dot-notation
const parserOptions = instance['_buildParseArgsOption']();

expect(parserOptions.options).toHaveProperty(USER_AGENT_FLAG_NAME);

const parsed = parseArgs({
...parserOptions,
args: ['--user-agent', 'apify-agent-skills/ultimate-scraper-1.3.0'],
});

expect(parsed.values[USER_AGENT_FLAG_NAME]).toBe('apify-agent-skills/ultimate-scraper-1.3.0');
});

test('is parseable alongside command-specific flags', () => {
const instance = new CommandWithFlags('apify', CommandWithFlags.name, CommandWithFlags.name);
// eslint-disable-next-line dot-notation
const parserOptions = instance['_buildParseArgsOption']();

const parsed = parseArgs({
...parserOptions,
args: ['--foo', 'hello', '--user-agent', 'my-plugin/1.0.0'],
});

// command-declared string flags use multiple:true, so node returns an array.
expect(parsed.values.foo).toEqual(['hello']);
expect(parsed.values[USER_AGENT_FLAG_NAME]).toBe('my-plugin/1.0.0');
});

test('is optional — parser does not fail when omitted', () => {
const instance = new NoOpCommand('apify', NoOpCommand.name, NoOpCommand.name);
// eslint-disable-next-line dot-notation
const parserOptions = instance['_buildParseArgsOption']();

const parsed = parseArgs({
...parserOptions,
args: [],
});

expect(parsed.values[USER_AGENT_FLAG_NAME]).toBeUndefined();
});

test('treats --user-agent after `--` separator as a positional, not a flag', () => {
const instance = new NoOpCommand('apify', NoOpCommand.name, NoOpCommand.name);
// eslint-disable-next-line dot-notation
const parserOptions = instance['_buildParseArgsOption']();

const parsed = parseArgs({
...parserOptions,
args: ['--', '--user-agent', 'forwarded-value'],
});

expect(parsed.values[USER_AGENT_FLAG_NAME]).toBeUndefined();
expect(parsed.positionals).toEqual(['--user-agent', 'forwarded-value']);
});

test('is NOT registered under non-apify entrypoints (e.g. actor)', () => {
const instance = new NoOpCommand('actor', NoOpCommand.name, NoOpCommand.name);
// eslint-disable-next-line dot-notation
const parserOptions = instance['_buildParseArgsOption']();

expect(parserOptions.options).not.toHaveProperty(USER_AGENT_FLAG_NAME);

// parseArgs with strict:true should now reject --user-agent on this entrypoint.
expect(() =>
parseArgs({
...parserOptions,
args: ['--user-agent', 'foo'],
}),
).toThrow();
});
});

describe('--user-agent end-to-end telemetry wiring', () => {
const originalEnv = process.env[USER_AGENT_ENV_VAR];

afterEach(() => {
if (originalEnv === undefined) {
delete process.env[USER_AGENT_ENV_VAR];
} else {
process.env[USER_AGENT_ENV_VAR] = originalEnv;
}
});

async function runWith(
args: string[],
entrypoint = 'apify',
): Promise<_BuiltApifyCommand & { telemetryData: Record<string, unknown> }> {
const instance = new NoOpCommand(entrypoint, NoOpCommand.name, NoOpCommand.name);
// eslint-disable-next-line dot-notation
instance['skipTelemetry'] = true;
// eslint-disable-next-line dot-notation
const parserOptions = instance['_buildParseArgsOption']();
const parsed = parseArgs({ ...parserOptions, args });
// eslint-disable-next-line dot-notation
await instance['_run'](parsed);
return instance as unknown as _BuiltApifyCommand & { telemetryData: Record<string, unknown> };
}

test('populates telemetryData.userAgent when flag passed', async () => {
delete process.env[USER_AGENT_ENV_VAR];
const instance = await runWith(['--user-agent', 'skills/scraper-1.0']);
expect(instance.telemetryData.userAgent).toBe('skills/scraper-1.0');
});

test('falls back to APIFY_CLI_USER_AGENT env var', async () => {
process.env[USER_AGENT_ENV_VAR] = 'env-caller/2.0';
const instance = await runWith([]);
expect(instance.telemetryData.userAgent).toBe('env-caller/2.0');
});

test('flag overrides env var', async () => {
process.env[USER_AGENT_ENV_VAR] = 'env-caller/2.0';
const instance = await runWith(['--user-agent', 'flag-caller/3.0']);
expect(instance.telemetryData.userAgent).toBe('flag-caller/3.0');
});

test('leaves userAgent unset when neither flag nor env var provided', async () => {
delete process.env[USER_AGENT_ENV_VAR];
const instance = await runWith([]);
expect(instance.telemetryData.userAgent).toBeUndefined();
});

test('sanitizes oversized flag input before emitting', async () => {
delete process.env[USER_AGENT_ENV_VAR];
const bigValue = 'a'.repeat(USER_AGENT_MAX_LENGTH + 50);
const instance = await runWith(['--user-agent', bigValue]);
expect((instance.telemetryData.userAgent as string).length).toBe(USER_AGENT_MAX_LENGTH);
});

test('ignores env var entirely when running under actor entrypoint', async () => {
process.env[USER_AGENT_ENV_VAR] = 'env-caller/2.0';
const instance = await runWith([], 'actor');
expect(instance.telemetryData.userAgent).toBeUndefined();
});
});