diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 701415c0..530661dd 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -284,6 +284,82 @@ let mut cli = pdsh_cli.to_bssh_cli(); - Unknown pdsh options produce helpful error messages - Normal bssh operation is completely unaffected by pdsh compat code +### 1.5 Hostlist Expression Support (`hostlist/*`) + +**Module Structure (Added 2025-12-17, Issue #98):** +- `hostlist/mod.rs` - Module exports and comma-separated pattern handling (130 lines) +- `hostlist/parser.rs` - Range expression parser (570 lines) +- `hostlist/expander.rs` - Range expansion and cartesian product (270 lines) +- `hostlist/error.rs` - Error types with thiserror (80 lines) + +**Design Decisions:** +- pdsh-compatible hostlist expression syntax +- Zero-cost abstraction for non-range patterns (pass-through) +- Efficient cartesian product expansion for multiple ranges +- Distinguishes hostlist expressions from glob patterns + +**Hostlist Expression Syntax:** +``` +hostlist = host_term (',' host_term)* +host_term = prefix range_expr suffix +range_expr = '[' range_list ']' +range_list = range_item (',' range_item)* +range_item = NUMBER | NUMBER '-' NUMBER +prefix = STRING (any characters before '[') +suffix = STRING (any characters after ']', may include nested ranges) +``` + +**Supported Features:** +- Simple range: `node[1-5]` -> `node1, node2, node3, node4, node5` +- Zero-padded: `node[01-05]` -> `node01, node02, node03, node04, node05` +- Comma-separated: `node[1,3,5]` -> `node1, node3, node5` +- Mixed: `node[1-3,7,9-10]` -> 7 hosts +- Cartesian product: `rack[1-2]-node[1-3]` -> 6 hosts +- With domain: `web[1-3].example.com` -> 3 hosts +- With user/port: `admin@db[01-03]:5432` -> 3 hosts with user and port +- File input: `^/path/to/file` -> read hosts from file + +**Integration Points:** +- `-H` option in native bssh mode (all patterns automatically expanded) +- `-w` option in pdsh compatibility mode +- `--filter` option (supports both glob and hostlist patterns) +- `--exclude` option (supports both glob and hostlist patterns) +- pdsh query mode (`-q`) with full expansion support + +**Pattern Detection Heuristics:** +```rust +// Distinguishes hostlist expressions from glob patterns +// Hostlist: [1-5], [01-05], [1,2,3], [1-3,5-7] (numeric content) +// Glob: [abc], [a-z], [!xyz] (alphabetic content) + +fn is_hostlist_expression(pattern: &str) -> bool { + // Check if brackets contain numeric ranges + // Numeric: 1-5, 01-05, 1,2,3 + // Non-numeric (glob): abc, a-z, !xyz +} +``` + +**Safety Limits:** +- Maximum expansion size: 100,000 hosts (prevents DoS) +- Validates range direction (start <= end) +- Error on empty brackets, unclosed brackets, nested brackets +- IPv6 literal bracket disambiguation + +**Data Flow:** +``` +Input: "admin@web[1-3].example.com:22" + ↓ + Parse user prefix: "admin@" + ↓ + Parse hostname with range: "web[1-3].example.com" + ↓ + Expand range: ["web1.example.com", "web2.example.com", "web3.example.com"] + ↓ + Parse port suffix: ":22" + ↓ +Output: ["admin@web1.example.com:22", "admin@web2.example.com:22", "admin@web3.example.com:22"] +``` + ### 2. Configuration Management (`config/*`) **Module Structure (Refactored 2025-10-17):** diff --git a/README.md b/README.md index c9a534d9..0491b7d5 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ A high-performance SSH client with **SSH-compatible syntax** for both single-hos - **Port Forwarding**: Full support for local (-L), remote (-R), and dynamic (-D) SSH port forwarding - **Jump Host Support**: Connect through bastion hosts using OpenSSH ProxyJump syntax (`-J`) - **Parallel Execution**: Execute commands across multiple nodes simultaneously +- **Hostlist Expressions**: pdsh-style range expansion (`node[1-5]`, `rack[1-2]-node[1-3]`) for compact host specification - **Fail-Fast Mode**: Stop immediately on first failure with `-k` flag (pdsh compatible) - **Interactive Terminal UI (TUI)**: Real-time monitoring with 4 view modes (Summary/Detail/Split/Diff) for multi-node operations - **Cluster Management**: Define and manage node clusters via configuration files @@ -179,14 +180,24 @@ bssh -H "user1@host1.com,user2@host2.com:2222" "uptime" # Using cluster from config bssh -C production "df -h" +# Hostlist expressions (pdsh-style range expansion) +bssh -H "node[1-5]" "uptime" # node1, node2, node3, node4, node5 +bssh -H "node[01-05]" "df -h" # Zero-padded: node01, node02, ... +bssh -H "node[1,3,5]" "ps aux" # Specific values: node1, node3, node5 +bssh -H "rack[1-2]-node[1-3]" "uptime" # Cartesian product: 6 hosts +bssh -H "web[1-3].example.com" "nginx -v" # With domain suffix +bssh -H "admin@db[01-03]:5432" "psql --version" # With user and port +bssh -H "^/etc/hosts.cluster" "uptime" # Read hosts from file + # Filter specific hosts with pattern matching bssh -H "web1,web2,db1,db2" -f "web*" "systemctl status nginx" bssh -C production -f "db*" "pg_dump --version" +bssh -H "node[1-10]" -f "node[1-5]" "uptime" # Filter with hostlist expression # Exclude specific hosts from execution bssh -H "node1,node2,node3" --exclude "node2" "uptime" bssh -C production --exclude "db*" "systemctl restart nginx" -bssh -C production --exclude "web1,web2" "apt update" +bssh -H "node[1-10]" --exclude "node[3-5]" "uptime" # Exclude with hostlist expression # With custom SSH key bssh -C staging -i ~/.ssh/custom_key "systemctl status nginx" diff --git a/docs/man/bssh.1 b/docs/man/bssh.1 index 94ca1c3e..3683898b 100644 --- a/docs/man/bssh.1 +++ b/docs/man/bssh.1 @@ -113,7 +113,29 @@ Example: -D 1080 (SOCKS5 proxy on localhost:1080), -D *:1080/4 (SOCKS4 on all in .TP .BR \-H ", " \-\-hosts " " \fIHOSTS\fR Comma-separated list of hosts in [user@]hostname[:port] format. -Example: user1@host1:2222,user2@host2 +Supports pdsh-style hostlist expressions for range expansion. +.RS +.PP +Simple host list: +.IP \[bu] 2 +-H "user1@host1:2222,user2@host2" +.PP +Hostlist expressions (range expansion): +.IP \[bu] 2 +-H "node[1-5]" \[->] node1, node2, node3, node4, node5 +.IP \[bu] 2 +-H "node[01-05]" \[->] node01, node02, ... (zero-padded) +.IP \[bu] 2 +-H "node[1,3,5]" \[->] node1, node3, node5 (specific values) +.IP \[bu] 2 +-H "rack[1-2]-node[1-3]" \[->] 6 hosts (cartesian product) +.IP \[bu] 2 +-H "web[1-3].example.com" \[->] web1.example.com, web2.example.com, ... +.IP \[bu] 2 +-H "admin@web[1-3]:22" \[->] expands with user and port preserved +.IP \[bu] 2 +-H "^/path/to/hostfile" \[->] read hosts from file +.RE .TP .BR \-C ", " \-\-cluster " " \fICLUSTER\fR @@ -166,29 +188,47 @@ Password is never logged or printed in any output .TP .BR \-f ", " \-\-filter " " \fIPATTERN\fR -Filter hosts by pattern (supports wildcards like 'web*'). +Filter hosts by pattern. Supports both wildcards and hostlist expressions. Use with -H or -C to execute on a subset of hosts. -Example: -f "web*" matches web01, web02, etc. +.RS +.PP +Examples: +.IP \[bu] 2 +-f "web*" \[->] matches web01, web02, etc. (glob pattern) +.IP \[bu] 2 +-f "node[1-5]" \[->] matches node1 through node5 (hostlist expression) +.IP \[bu] 2 +-f "node[1,3,5]" \[->] matches node1, node3, node5 (specific values) +.RE .TP .BR \-\-exclude " " \fIHOSTS\fR Exclude hosts from target list (comma-separated). -Supports wildcard patterns: '*' (any chars), '?' (single char), '[abc]' (char set). -Patterns with wildcards use glob matching; plain patterns use substring matching. +Supports wildcards, glob patterns, and hostlist expressions. Applied after --filter option. .RS .PP -Examples: +Glob patterns: +.IP \[bu] 2 +--exclude "db*" \[->] exclude hosts starting with 'db' .IP \[bu] 2 ---exclude "node2" - Exclude single host +--exclude "*-backup" \[->] exclude backup nodes .IP \[bu] 2 ---exclude "web1,web2" - Exclude multiple hosts +--exclude "web[12]" \[->] exclude web1 and web2 (glob character class) +.PP +Hostlist expressions: +.IP \[bu] 2 +--exclude "node[3-5]" \[->] exclude node3, node4, node5 (range) .IP \[bu] 2 ---exclude "db*" - Exclude hosts starting with 'db' +--exclude "node[1,3,5]" \[->] exclude node1, node3, node5 (specific values) .IP \[bu] 2 ---exclude "*-backup" - Exclude backup nodes +--exclude "rack[1-2]-node[1-3]" \[->] exclude 6 hosts (cartesian product) +.PP +Simple patterns: .IP \[bu] 2 ---exclude "web[12]" - Exclude web1 and web2 +--exclude "node2" \[->] exclude single host +.IP \[bu] 2 +--exclude "web1,web2" \[->] exclude multiple hosts .RE .TP @@ -1094,6 +1134,79 @@ Current node's role (main or sub) Note: Backend.AI multi-node clusters use SSH port 2200 by default, which is automatically configured. +.SH HOSTLIST EXPRESSIONS +Hostlist expressions provide a compact way to specify multiple hosts using range notation, +compatible with pdsh syntax. This allows efficient targeting of large numbers of hosts +without listing each one individually. + +.SS Basic Syntax +.TP +.B Simple range +.B node[1-5] +expands to node1, node2, node3, node4, node5 +.TP +.B Zero-padded range +.B node[01-05] +expands to node01, node02, node03, node04, node05 +.TP +.B Comma-separated values +.B node[1,3,5] +expands to node1, node3, node5 +.TP +.B Mixed ranges and values +.B node[1-3,7,9-10] +expands to node1, node2, node3, node7, node9, node10 + +.SS Advanced Patterns +.TP +.B Multiple ranges (cartesian product) +.B rack[1-2]-node[1-3] +expands to rack1-node1, rack1-node2, rack1-node3, rack2-node1, rack2-node2, rack2-node3 +.TP +.B Domain suffix +.B web[1-3].example.com +expands to web1.example.com, web2.example.com, web3.example.com +.TP +.B With user and port +.B admin@server[1-3]:2222 +expands to admin@server1:2222, admin@server2:2222, admin@server3:2222 + +.SS File Input +.TP +.B ^/path/to/hostfile +Reads hosts from file, one per line. Lines starting with # are comments. +Maximum file size: 1MB, maximum lines: 100,000. + +.SS Using with Options +Hostlist expressions can be used with: +.TP +.B -H, --hosts +Specify target hosts: +.B bssh -H "node[1-10]" "uptime" +.TP +.B --filter +Include only matching hosts: +.B bssh -c cluster --filter "web[1-5]" "systemctl status nginx" +.TP +.B --exclude +Exclude matching hosts: +.B bssh -c cluster --exclude "node[1,3,5]" "df -h" + +.SS Examples +.nf +# Execute on 100 nodes +bssh -H "compute[001-100]" "hostname" + +# Target specific racks +bssh -H "rack[A-C]-node[1-8]" "uptime" + +# Use hosts from file +bssh -H "^/etc/cluster/hosts" "date" + +# Combine with exclusions +bssh -H "node[1-20]" --exclude "node[5,10,15]" "ps aux" +.fi + .SH EXAMPLES .SS SSH Compatibility Mode (Single Host) diff --git a/src/app/nodes.rs b/src/app/nodes.rs index 28548c2e..7e77b1da 100644 --- a/src/app/nodes.rs +++ b/src/app/nodes.rs @@ -15,7 +15,7 @@ //! Node resolution and filtering functionality use anyhow::{Context, Result}; -use bssh::{cli::Cli, config::Config, node::Node, ssh::SshConfig}; +use bssh::{cli::Cli, config::Config, hostlist, node::Node, ssh::SshConfig}; use glob::Pattern; /// Parse a node string with SSH config integration @@ -129,11 +129,14 @@ pub async fn resolve_nodes( let node = Node::new(effective_hostname, effective_port, effective_user); nodes.push(node); } else if let Some(hosts) = &cli.hosts { - // Parse hosts from CLI + // Parse hosts from CLI with hostlist expression expansion for host_str in hosts { - // Split by comma if a single argument contains multiple hosts - for single_host in host_str.split(',') { - let node = parse_node_with_ssh_config(single_host.trim(), ssh_config)?; + // Expand hostlist expressions (e.g., node[1-5], rack[1-2]-node[1-3]) + let expanded_hosts = hostlist::expander::expand_host_specs(host_str) + .with_context(|| format!("Failed to expand host expression: {host_str}"))?; + + for single_host in expanded_hosts { + let node = parse_node_with_ssh_config(&single_host, ssh_config)?; nodes.push(node); } } @@ -152,7 +155,8 @@ pub async fn resolve_nodes( // Apply host filter if destination is used as a filter pattern if let Some(filter) = cli.get_host_filter() { - nodes = filter_nodes(nodes, filter)?; + // Expand hostlist expressions in filter pattern (e.g., --filter "node[1-5]") + nodes = filter_nodes_with_hostlist(nodes, filter)?; if nodes.is_empty() { anyhow::bail!("No hosts matched the filter pattern: {filter}"); } @@ -161,7 +165,8 @@ pub async fn resolve_nodes( // Apply host exclusion patterns (--exclude option) if let Some(exclude_patterns) = cli.get_exclude_patterns() { let node_count_before = nodes.len(); - nodes = exclude_nodes(nodes, exclude_patterns)?; + // Expand hostlist expressions in exclusion patterns + nodes = exclude_nodes_with_hostlist(nodes, exclude_patterns)?; if nodes.is_empty() { let patterns_str = exclude_patterns.join(", "); anyhow::bail!( @@ -344,9 +349,92 @@ pub fn filter_nodes(nodes: Vec, pattern: &str) -> Result> { } } +/// Filter nodes with hostlist expression support +/// +/// If the pattern contains hostlist expressions (e.g., node[1-5]), +/// expands the pattern and filters to matching nodes. +/// Otherwise, falls back to standard glob/exact matching. +pub fn filter_nodes_with_hostlist(nodes: Vec, pattern: &str) -> Result> { + // Security: Basic validation + if pattern.is_empty() { + anyhow::bail!("Filter pattern cannot be empty"); + } + + // Check if this looks like a hostlist expression + if hostlist::is_hostlist_expression(pattern) { + // Expand the hostlist expression + let expanded_patterns = hostlist::expander::expand_host_specs(pattern) + .with_context(|| format!("Failed to expand filter pattern: {pattern}"))?; + + // Create a set of expanded patterns for efficient lookup + let pattern_set: std::collections::HashSet<&str> = + expanded_patterns.iter().map(|s| s.as_str()).collect(); + + // Filter nodes that match any expanded pattern + let filtered: Vec = nodes + .into_iter() + .filter(|node| { + pattern_set.contains(node.host.as_str()) + || pattern_set.contains(node.to_string().as_str()) + }) + .collect(); + + Ok(filtered) + } else { + // Fall back to standard filter_nodes for glob patterns + filter_nodes(nodes, pattern) + } +} + +/// Exclude nodes with hostlist expression support +/// +/// Expands any hostlist expressions in exclusion patterns before matching. +pub fn exclude_nodes_with_hostlist(nodes: Vec, patterns: &[String]) -> Result> { + if patterns.is_empty() { + return Ok(nodes); + } + + // Expand all patterns and collect into a set of hostnames to exclude + let mut expanded_patterns = Vec::new(); + let mut glob_patterns = Vec::new(); + + for pattern in patterns { + if hostlist::is_hostlist_expression(pattern) { + // Expand hostlist expression + let expanded = hostlist::expander::expand_host_specs(pattern) + .with_context(|| format!("Failed to expand exclusion pattern: {pattern}"))?; + expanded_patterns.extend(expanded); + } else { + // Keep as a glob pattern for later matching + glob_patterns.push(pattern.clone()); + } + } + + // Create a set of expanded patterns for O(1) lookup + let expanded_set: std::collections::HashSet<&str> = + expanded_patterns.iter().map(|s| s.as_str()).collect(); + + // First filter by expanded hostlist patterns + let mut filtered: Vec = nodes + .into_iter() + .filter(|node| { + !expanded_set.contains(node.host.as_str()) + && !expanded_set.contains(node.to_string().as_str()) + }) + .collect(); + + // Then apply glob patterns using existing exclude_nodes + if !glob_patterns.is_empty() { + filtered = exclude_nodes(filtered, &glob_patterns)?; + } + + Ok(filtered) +} + #[cfg(test)] mod tests { use super::*; + use bssh::hostlist::is_hostlist_expression; fn create_test_nodes() -> Vec { vec![ @@ -584,4 +672,163 @@ mod tests { assert!(result.iter().any(|n| n.host == "web1.example.com")); assert!(result.iter().any(|n| n.host == "web2.example.com")); } + + // ==================== Hostlist Expression Tests ==================== + + #[test] + fn test_is_hostlist_expression_numeric_range() { + // Numeric ranges should be detected as hostlist + assert!(is_hostlist_expression("node[1-5]")); + assert!(is_hostlist_expression("node[01-05]")); + assert!(is_hostlist_expression("node[1,2,3]")); + assert!(is_hostlist_expression("node[1-3,5-7]")); + assert!(is_hostlist_expression("rack[1-2]-node[1-3]")); + } + + #[test] + fn test_is_hostlist_expression_glob_pattern() { + // Glob patterns should NOT be detected as hostlist + assert!(!is_hostlist_expression("web*")); + assert!(!is_hostlist_expression("web[abc]")); + assert!(!is_hostlist_expression("web[a-z]")); + assert!(!is_hostlist_expression("web[!12]")); + assert!(!is_hostlist_expression("simple.host.com")); + } + + #[test] + fn test_filter_nodes_with_hostlist_range() { + let nodes = vec![ + Node::new("node1".to_string(), 22, "admin".to_string()), + Node::new("node2".to_string(), 22, "admin".to_string()), + Node::new("node3".to_string(), 22, "admin".to_string()), + Node::new("node4".to_string(), 22, "admin".to_string()), + Node::new("node5".to_string(), 22, "admin".to_string()), + ]; + + // Filter to nodes 1-3 using hostlist expression + let result = filter_nodes_with_hostlist(nodes, "node[1-3]").unwrap(); + + assert_eq!(result.len(), 3); + assert!(result.iter().any(|n| n.host == "node1")); + assert!(result.iter().any(|n| n.host == "node2")); + assert!(result.iter().any(|n| n.host == "node3")); + assert!(!result.iter().any(|n| n.host == "node4")); + assert!(!result.iter().any(|n| n.host == "node5")); + } + + #[test] + fn test_filter_nodes_with_hostlist_comma_separated() { + let nodes = vec![ + Node::new("node1".to_string(), 22, "admin".to_string()), + Node::new("node2".to_string(), 22, "admin".to_string()), + Node::new("node3".to_string(), 22, "admin".to_string()), + Node::new("node4".to_string(), 22, "admin".to_string()), + Node::new("node5".to_string(), 22, "admin".to_string()), + ]; + + // Filter to specific nodes using hostlist expression + let result = filter_nodes_with_hostlist(nodes, "node[1,3,5]").unwrap(); + + assert_eq!(result.len(), 3); + assert!(result.iter().any(|n| n.host == "node1")); + assert!(result.iter().any(|n| n.host == "node3")); + assert!(result.iter().any(|n| n.host == "node5")); + } + + #[test] + fn test_filter_nodes_with_hostlist_falls_back_to_glob() { + let nodes = create_test_nodes(); + + // Glob pattern (not a hostlist) should still work + let result = filter_nodes_with_hostlist(nodes, "web*").unwrap(); + + assert_eq!(result.len(), 2); + assert!(result.iter().all(|n| n.host.starts_with("web"))); + } + + #[test] + fn test_exclude_nodes_with_hostlist_range() { + let nodes = vec![ + Node::new("node1".to_string(), 22, "admin".to_string()), + Node::new("node2".to_string(), 22, "admin".to_string()), + Node::new("node3".to_string(), 22, "admin".to_string()), + Node::new("node4".to_string(), 22, "admin".to_string()), + Node::new("node5".to_string(), 22, "admin".to_string()), + ]; + + // Exclude nodes 2-4 using hostlist expression + let patterns = vec!["node[2-4]".to_string()]; + let result = exclude_nodes_with_hostlist(nodes, &patterns).unwrap(); + + assert_eq!(result.len(), 2); + assert!(result.iter().any(|n| n.host == "node1")); + assert!(result.iter().any(|n| n.host == "node5")); + } + + #[test] + fn test_exclude_nodes_with_hostlist_mixed_patterns() { + let nodes = vec![ + Node::new("node1".to_string(), 22, "admin".to_string()), + Node::new("node2".to_string(), 22, "admin".to_string()), + Node::new("node3".to_string(), 22, "admin".to_string()), + Node::new("web1".to_string(), 22, "admin".to_string()), + Node::new("web2".to_string(), 22, "admin".to_string()), + ]; + + // Mix hostlist and glob patterns + let patterns = vec!["node[1-2]".to_string(), "web*".to_string()]; + let result = exclude_nodes_with_hostlist(nodes, &patterns).unwrap(); + + assert_eq!(result.len(), 1); + assert_eq!(result[0].host, "node3"); + } + + #[test] + fn test_exclude_nodes_with_hostlist_falls_back_to_glob() { + let nodes = create_test_nodes(); + + // Pure glob pattern (not a hostlist) should still work + let patterns = vec!["db*".to_string()]; + let result = exclude_nodes_with_hostlist(nodes, &patterns).unwrap(); + + assert_eq!(result.len(), 3); + assert!(!result.iter().any(|n| n.host.starts_with("db"))); + } + + #[test] + fn test_filter_nodes_with_hostlist_zero_padded() { + let nodes = vec![ + Node::new("node01".to_string(), 22, "admin".to_string()), + Node::new("node02".to_string(), 22, "admin".to_string()), + Node::new("node03".to_string(), 22, "admin".to_string()), + Node::new("node04".to_string(), 22, "admin".to_string()), + Node::new("node05".to_string(), 22, "admin".to_string()), + ]; + + // Filter using zero-padded hostlist expression + let result = filter_nodes_with_hostlist(nodes, "node[01-03]").unwrap(); + + assert_eq!(result.len(), 3); + assert!(result.iter().any(|n| n.host == "node01")); + assert!(result.iter().any(|n| n.host == "node02")); + assert!(result.iter().any(|n| n.host == "node03")); + } + + #[test] + fn test_exclude_nodes_with_hostlist_cartesian_product() { + let nodes = vec![ + Node::new("rack1-node1".to_string(), 22, "admin".to_string()), + Node::new("rack1-node2".to_string(), 22, "admin".to_string()), + Node::new("rack2-node1".to_string(), 22, "admin".to_string()), + Node::new("rack2-node2".to_string(), 22, "admin".to_string()), + Node::new("rack3-node1".to_string(), 22, "admin".to_string()), + ]; + + // Exclude using cartesian product hostlist expression + let patterns = vec!["rack[1-2]-node[1-2]".to_string()]; + let result = exclude_nodes_with_hostlist(nodes, &patterns).unwrap(); + + assert_eq!(result.len(), 1); + assert_eq!(result[0].host, "rack3-node1"); + } } diff --git a/src/cli/bssh.rs b/src/cli/bssh.rs index 129a86ee..53677643 100644 --- a/src/cli/bssh.rs +++ b/src/cli/bssh.rs @@ -23,7 +23,7 @@ use std::path::PathBuf; before_help = "\n\nBroadcast SSH - Parallel command execution across cluster nodes", about = "Broadcast SSH - SSH-compatible parallel command execution tool", long_about = "bssh is a high-performance SSH client with parallel execution capabilities.\nIt can be used as a drop-in replacement for SSH (single host) or as a powerful cluster management tool (multiple hosts).\n\nThe tool provides secure file transfer using SFTP and supports SSH keys, SSH agent, and password authentication.\nIt automatically detects Backend.AI multi-node session environments.\n\nOutput Modes:\n- TUI Mode (default): Interactive terminal UI with real-time monitoring (auto-enabled in terminals)\n- Stream Mode (--stream): Real-time output with [node] prefixes\n- File Mode (--output-dir): Save per-node output to timestamped files\n- Normal Mode: Traditional output after all nodes complete\n\nSSH Configuration Support:\n- Reads standard SSH config files (defaulting to ~/.ssh/config)\n- Supports Host patterns, HostName, User, Port, IdentityFile, StrictHostKeyChecking\n- ProxyJump, and many other SSH configuration directives\n- CLI arguments override SSH config values following SSH precedence rules", - after_help = "EXAMPLES:\n SSH Mode:\n bssh user@host # Interactive shell\n bssh admin@server.com \"uptime\" # Execute command\n bssh -p 2222 -i ~/.ssh/key user@host # Custom port and key\n bssh -F ~/.ssh/myconfig webserver # Use custom SSH config\n\n Port Forwarding:\n bssh -L 8080:example.com:80 user@host # Local forward: localhost:8080 → example.com:80\n bssh -R 8080:localhost:80 user@host # Remote forward: remote:8080 → localhost:80\n bssh -D 1080 user@host # SOCKS5 proxy on localhost:1080\n bssh -L 3306:db:3306 -R 80:web:80 user@host # Multiple forwards\n bssh -D *:1080/4 user@host # SOCKS4 proxy on all interfaces\n\n Multi-Server Mode:\n bssh -C production \"systemctl status\" # Execute on cluster (TUI mode auto-enabled)\n bssh -H \"web1,web2,web3\" \"df -h\" # Execute on multiple hosts\n bssh -H \"web1,web2,web3\" -f \"web1\" \"df -h\" # Filter to web1 only\n bssh -C production -f \"web*\" \"uptime\" # Filter cluster nodes\n bssh --parallel 20 -H web* \"apt update\" # Increase parallelism\n\n Host Exclusion (--exclude):\n bssh -H \"node1,node2,node3\" --exclude \"node2\" \"uptime\" # Exclude single host\n bssh -C production --exclude \"web1,web2\" \"apt update\" # Exclude multiple hosts\n bssh -C production --exclude \"db*\" \"systemctl restart\" # Exclude with wildcard pattern\n bssh -C production --exclude \"*-backup\" \"df -h\" # Exclude backup nodes\n\n Fail-Fast Mode (pdsh -k compatible):\n bssh -k -H \"web1,web2,web3\" \"deploy.sh\" # Stop on first failure\n bssh --fail-fast -C prod \"apt upgrade\" # Critical deployment - stop if any node fails\n bssh -k --require-all-success -C prod cmd # Fail-fast + require all success\n\n Output Modes:\n bssh -C prod \"apt-get update\" # TUI mode (default, interactive monitoring)\n bssh -C prod --stream \"tail -f log\" # Stream mode (real-time with [node] prefixes)\n bssh -C prod --output-dir ./logs \"ps\" # File mode (save to timestamped files)\n bssh -C prod \"uptime\" | tee log.txt # Normal mode (auto-detected when piped)\n\n Batch Mode (Ctrl+C Handling):\n bssh -C prod \"long-running-command\" # Default: first Ctrl+C shows status, second terminates\n bssh -C prod -b \"long-command\" # Batch mode: single Ctrl+C terminates immediately\n bssh -H nodes --batch --stream \"cmd\" # Useful for CI/CD and non-interactive scripts\n\n TUI Mode Controls (when in TUI):\n 1-9 Jump to node detail view\n s Enter split view (2-4 nodes)\n d Enter diff view (compare nodes)\n f Toggle auto-scroll\n ↑/↓ Scroll output\n ←/→ Switch nodes\n Esc Return to summary\n ? Show help\n q Quit\n\n File Operations:\n bssh -C staging upload file.txt /tmp/ # Upload to cluster\n bssh -H host1,host2 download /etc/hosts ./backups/\n\n Other Commands:\n bssh list # List configured clusters\n bssh -C production ping # Test connectivity\n bssh -H hosts interactive # Interactive mode\n\n SSH Config Example (~/.ssh/config):\n Host web*\n HostName web.example.com\n User webuser\n Port 2222\n IdentityFile ~/.ssh/web_key\n StrictHostKeyChecking yes\n\nDeveloped and maintained as part of the Backend.AI project.\nFor more information: https://github.com/lablup/bssh" + after_help = "EXAMPLES:\n SSH Mode:\n bssh user@host # Interactive shell\n bssh admin@server.com \"uptime\" # Execute command\n bssh -p 2222 -i ~/.ssh/key user@host # Custom port and key\n bssh -F ~/.ssh/myconfig webserver # Use custom SSH config\n\n Port Forwarding:\n bssh -L 8080:example.com:80 user@host # Local forward: localhost:8080 -> example.com:80\n bssh -R 8080:localhost:80 user@host # Remote forward: remote:8080 -> localhost:80\n bssh -D 1080 user@host # SOCKS5 proxy on localhost:1080\n bssh -L 3306:db:3306 -R 80:web:80 user@host # Multiple forwards\n bssh -D *:1080/4 user@host # SOCKS4 proxy on all interfaces\n\n Multi-Server Mode:\n bssh -C production \"systemctl status\" # Execute on cluster (TUI mode auto-enabled)\n bssh -H \"web1,web2,web3\" \"df -h\" # Execute on multiple hosts\n bssh -H \"web1,web2,web3\" -f \"web1\" \"df -h\" # Filter to web1 only\n bssh -C production -f \"web*\" \"uptime\" # Filter cluster nodes\n bssh --parallel 20 -H web* \"apt update\" # Increase parallelism\n\n Hostlist Expression (pdsh-style range expansion):\n bssh -H \"node[1-5]\" \"uptime\" # Expands to node1, node2, node3, node4, node5\n bssh -H \"node[01-03]\" \"df -h\" # Zero-padded: node01, node02, node03\n bssh -H \"node[1,3,5]\" \"ps aux\" # Specific values: node1, node3, node5\n bssh -H \"node[1-3,7,9-10]\" \"uptime\" # Mixed: node1-3, node7, node9-10\n bssh -H \"rack[1-2]-node[1-3]\" \"uptime\" # Cartesian product: 6 hosts\n bssh -H \"web[1-3].example.com\" \"uptime\" # With domain suffix\n bssh -H \"admin@db[01-03]:5432\" \"psql\" # With user and port\n bssh -H \"^/etc/hosts.cluster\" \"uptime\" # Read hosts from file\n\n Host Exclusion (--exclude):\n bssh -H \"node1,node2,node3\" --exclude \"node2\" \"uptime\" # Exclude single host\n bssh -C production --exclude \"web1,web2\" \"apt update\" # Exclude multiple hosts\n bssh -C production --exclude \"db*\" \"systemctl restart\" # Exclude with wildcard pattern\n bssh -H \"node[1-10]\" --exclude \"node[3-5]\" \"uptime\" # Exclude with hostlist expression\n\n Fail-Fast Mode (pdsh -k compatible):\n bssh -k -H \"web[1-3]\" \"deploy.sh\" # Stop on first failure\n bssh --fail-fast -C prod \"apt upgrade\" # Critical deployment - stop if any node fails\n bssh -k --require-all-success -C prod cmd # Fail-fast + require all success\n\n Output Modes:\n bssh -C prod \"apt-get update\" # TUI mode (default, interactive monitoring)\n bssh -C prod --stream \"tail -f log\" # Stream mode (real-time with [node] prefixes)\n bssh -C prod --output-dir ./logs \"ps\" # File mode (save to timestamped files)\n bssh -C prod \"uptime\" | tee log.txt # Normal mode (auto-detected when piped)\n\n Batch Mode (Ctrl+C Handling):\n bssh -C prod \"long-running-command\" # Default: first Ctrl+C shows status, second terminates\n bssh -C prod -b \"long-command\" # Batch mode: single Ctrl+C terminates immediately\n bssh -H nodes --batch --stream \"cmd\" # Useful for CI/CD and non-interactive scripts\n\n TUI Mode Controls (when in TUI):\n 1-9 Jump to node detail view\n s Enter split view (2-4 nodes)\n d Enter diff view (compare nodes)\n f Toggle auto-scroll\n Up/Down Scroll output\n Left/Right Switch nodes\n Esc Return to summary\n ? Show help\n q Quit\n\n File Operations:\n bssh -C staging upload file.txt /tmp/ # Upload to cluster\n bssh -H host1,host2 download /etc/hosts ./backups/\n\n Other Commands:\n bssh list # List configured clusters\n bssh -C production ping # Test connectivity\n bssh -H hosts interactive # Interactive mode\n\n SSH Config Example (~/.ssh/config):\n Host web*\n HostName web.example.com\n User webuser\n Port 2222\n IdentityFile ~/.ssh/web_key\n StrictHostKeyChecking yes\n\nDeveloped and maintained as part of the Backend.AI project.\nFor more information: https://github.com/lablup/bssh" )] pub struct Cli { /// SSH destination in format: [user@]hostname[:port] or ssh://[user@]hostname[:port] @@ -38,21 +38,21 @@ pub struct Cli { short = 'H', long, value_delimiter = ',', - help = "Comma-separated list of hosts in [user@]hostname[:port] format\nExamples: 'host1,host2' or 'user1@host1:2222,user2@host2'\nDefault user and port from config or current environment will be used if not specified" + help = "Comma-separated list of hosts with hostlist expansion support\nFormat: [user@]hostname[:port] with optional range expressions\nRange expressions:\n node[1-5] -> node1, node2, node3, node4, node5\n node[01-03] -> node01, node02, node03 (zero-padded)\n node[1,3,5] -> node1, node3, node5\n rack[1-2]-node[1-3] -> 6 hosts (cartesian product)\n ^/path/to/file -> read hosts from file\nExamples:\n 'web[1-3].example.com' for web1-web3\n 'admin@db[01-03]:5432' for db01-db03 with user and port" )] pub hosts: Option>, #[arg( short = 'f', long = "filter", - help = "Filter hosts by pattern (supports wildcards like 'web*')\nUse with -H or -C to execute on a subset of hosts\nExamples: 'web*' matches web01, web02, etc." + help = "Filter hosts by pattern (supports wildcards and hostlist expressions)\nUse with -H or -C to execute on a subset of hosts\nExamples:\n 'web*' -> matches web01, web02, etc. (glob)\n 'node[1-3]' -> matches node1, node2, node3 (hostlist)" )] pub filter: Option, #[arg( long = "exclude", value_delimiter = ',', - help = "Exclude hosts from target list (comma-separated)\nSupports wildcard patterns: '*' (any chars), '?' (single char), '[abc]' (char set)\nMatching: patterns with wildcards use glob matching; plain patterns use substring matching\nApplied after --filter option" + help = "Exclude hosts from target list (comma-separated)\nSupports wildcards and hostlist expressions:\n Wildcards: '*' (any chars), '?' (single char), '[abc]' (char set)\n Hostlist: 'node[1-5]', 'node[1,3,5]', 'rack[1-2]-node[1-3]'\nMatching: glob for wildcards, exact for hostlist expansions\nApplied after --filter option" )] pub exclude: Option>, diff --git a/src/hostlist/error.rs b/src/hostlist/error.rs new file mode 100644 index 00000000..f9d9a6f0 --- /dev/null +++ b/src/hostlist/error.rs @@ -0,0 +1,107 @@ +// Copyright 2025 Lablup Inc. and Jeongkyu Shin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Error types for hostlist parsing and expansion + +use thiserror::Error; + +/// Errors that can occur during hostlist parsing and expansion +#[derive(Debug, Error, PartialEq, Eq)] +pub enum HostlistError { + /// Empty bracket expression (e.g., `node[]`) + #[error("empty bracket expression in '{expression}'")] + EmptyBracket { expression: String }, + + /// Unclosed bracket (e.g., `node[1-5`) + #[error("unclosed bracket in '{expression}'")] + UnclosedBracket { expression: String }, + + /// Unmatched closing bracket (e.g., `node]1-5[`) + #[error("unmatched closing bracket in '{expression}'")] + UnmatchedBracket { expression: String }, + + /// Invalid range syntax (e.g., `node[a-z]`) + #[error("invalid range syntax '{range}' in '{expression}': {reason}")] + InvalidRange { + expression: String, + range: String, + reason: String, + }, + + /// Reversed range (e.g., `node[5-1]`) + #[error("reversed range '{start}-{end}' in '{expression}' (start must be <= end)")] + ReversedRange { + expression: String, + start: i64, + end: i64, + }, + + /// Range produces too many hosts + #[error( + "range expansion would produce {count} hosts, exceeding limit of {limit} in '{expression}'" + )] + RangeTooLarge { + expression: String, + count: usize, + limit: usize, + }, + + /// Invalid number in range + #[error("invalid number '{value}' in range expression '{expression}'")] + InvalidNumber { expression: String, value: String }, + + /// File not found for ^ prefix + #[error("hostfile not found: {path}")] + FileNotFound { path: String }, + + /// Error reading hostfile + #[error("failed to read hostfile '{path}': {reason}")] + FileReadError { path: String, reason: String }, + + /// Nested brackets (e.g., `node[[1-2]]`) + #[error("nested brackets are not supported in '{expression}'")] + NestedBrackets { expression: String }, + + /// IPv6 address disambiguation failure + #[error("cannot distinguish IPv6 literal from range expression in '{expression}'")] + Ipv6Ambiguity { expression: String }, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_display() { + let err = HostlistError::EmptyBracket { + expression: "node[]".to_string(), + }; + assert_eq!(err.to_string(), "empty bracket expression in 'node[]'"); + + let err = HostlistError::UnclosedBracket { + expression: "node[1-5".to_string(), + }; + assert_eq!(err.to_string(), "unclosed bracket in 'node[1-5'"); + + let err = HostlistError::ReversedRange { + expression: "node[5-1]".to_string(), + start: 5, + end: 1, + }; + assert_eq!( + err.to_string(), + "reversed range '5-1' in 'node[5-1]' (start must be <= end)" + ); + } +} diff --git a/src/hostlist/expander.rs b/src/hostlist/expander.rs new file mode 100644 index 00000000..ffc0c6fe --- /dev/null +++ b/src/hostlist/expander.rs @@ -0,0 +1,512 @@ +// Copyright 2025 Lablup Inc. and Jeongkyu Shin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Hostlist expansion implementation +//! +//! This module expands parsed host patterns into lists of hostnames +//! using cartesian product for multiple range expressions. + +use super::error::HostlistError; +use super::parser::{parse_host_pattern, PatternSegment}; + +/// Maximum number of hosts that can be generated from a single pattern +const MAX_EXPANSION_SIZE: usize = 100_000; + +/// Expand a hostlist expression into a list of hostnames +/// +/// # Arguments +/// +/// * `expr` - The hostlist expression to expand +/// +/// # Returns +/// +/// A vector of expanded hostnames. +/// +/// # Examples +/// +/// ```rust +/// use bssh::hostlist::expand_hostlist; +/// +/// // Simple range +/// let hosts = expand_hostlist("node[1-3]").unwrap(); +/// assert_eq!(hosts, vec!["node1", "node2", "node3"]); +/// +/// // Zero-padded +/// let hosts = expand_hostlist("server[01-03]").unwrap(); +/// assert_eq!(hosts, vec!["server01", "server02", "server03"]); +/// +/// // Cartesian product +/// let hosts = expand_hostlist("rack[1-2]-node[1-2]").unwrap(); +/// assert_eq!(hosts, vec!["rack1-node1", "rack1-node2", "rack2-node1", "rack2-node2"]); +/// ``` +pub fn expand_hostlist(expr: &str) -> Result, HostlistError> { + if expr.is_empty() { + return Ok(Vec::new()); + } + + let pattern = parse_host_pattern(expr)?; + + // Check expansion size before generating + let expansion_count = pattern.expansion_count(); + if expansion_count > MAX_EXPANSION_SIZE { + return Err(HostlistError::RangeTooLarge { + expression: expr.to_string(), + count: expansion_count, + limit: MAX_EXPANSION_SIZE, + }); + } + + // If no ranges, just concatenate literals + if !pattern.has_ranges() { + let host: String = pattern + .segments + .iter() + .filter_map(|s| match s { + PatternSegment::Literal(lit) => Some(lit.as_str()), + PatternSegment::Range(_) => None, + }) + .collect(); + return Ok(if host.is_empty() { + Vec::new() + } else { + vec![host] + }); + } + + // Expand using cartesian product + expand_segments(&pattern.segments) +} + +/// Expand pattern segments into a list of hostnames using cartesian product +fn expand_segments(segments: &[PatternSegment]) -> Result, HostlistError> { + if segments.is_empty() { + return Ok(vec![String::new()]); + } + + // Start with a single empty string + let mut results = vec![String::new()]; + + for segment in segments { + match segment { + PatternSegment::Literal(lit) => { + // Append literal to all current results + for result in &mut results { + result.push_str(lit); + } + } + PatternSegment::Range(range_expr) => { + // Expand with all values from the range + let values = range_expr.values(); + + // Use checked multiplication to prevent integer overflow + let new_capacity = results.len().checked_mul(values.len()).ok_or_else(|| { + HostlistError::RangeTooLarge { + expression: "cartesian product".to_string(), + count: usize::MAX, + limit: MAX_EXPANSION_SIZE, + } + })?; + + if new_capacity > MAX_EXPANSION_SIZE { + return Err(HostlistError::RangeTooLarge { + expression: "cartesian product".to_string(), + count: new_capacity, + limit: MAX_EXPANSION_SIZE, + }); + } + + let mut new_results = Vec::with_capacity(new_capacity); + + for result in &results { + for value in &values { + let formatted = range_expr.format_value(*value); + let mut new_result = result.clone(); + new_result.push_str(&formatted); + new_results.push(new_result); + } + } + + results = new_results; + } + } + } + + Ok(results) +} + +/// Expand a host specification that may include user@ prefix and :port suffix +/// +/// This function handles the full host specification format: +/// `[user@]hostpattern[:port]` +/// +/// # Arguments +/// +/// * `spec` - The full host specification +/// +/// # Returns +/// +/// A vector of expanded host specifications preserving user and port. +/// +/// # Examples +/// +/// ```rust +/// use bssh::hostlist::expander::expand_host_spec; +/// +/// let hosts = expand_host_spec("admin@web[1-2].example.com:22").unwrap(); +/// assert_eq!(hosts, vec![ +/// "admin@web1.example.com:22", +/// "admin@web2.example.com:22" +/// ]); +/// ``` +pub fn expand_host_spec(spec: &str) -> Result, HostlistError> { + if spec.is_empty() { + return Ok(Vec::new()); + } + + // Parse user prefix + let (user_prefix, rest) = if let Some(at_pos) = spec.find('@') { + // Check if @ is before any [ to avoid matching @ in expressions + let bracket_pos = spec.find('['); + if bracket_pos.is_none() || at_pos < bracket_pos.unwrap() { + let user = &spec[..=at_pos]; // includes @ + let rest = &spec[at_pos + 1..]; + (Some(user.to_string()), rest) + } else { + (None, spec) + } + } else { + (None, spec) + }; + + // Parse port suffix (find last : that's not inside brackets) + let (host_pattern, port_suffix) = parse_port_suffix(rest)?; + + // Expand the host pattern + let expanded_hosts = expand_hostlist(host_pattern)?; + + // Reconstruct with user and port + let results: Vec = expanded_hosts + .into_iter() + .map(|host| { + let mut result = String::new(); + if let Some(ref user) = user_prefix { + result.push_str(user); + } + result.push_str(&host); + if let Some(ref port) = port_suffix { + result.push_str(port); + } + result + }) + .collect(); + + Ok(results) +} + +/// Parse port suffix from a host pattern, being careful about brackets +fn parse_port_suffix(spec: &str) -> Result<(&str, Option), HostlistError> { + // Find the last : that's not inside brackets + let mut bracket_depth = 0; + let mut last_colon_outside = None; + + for (i, ch) in spec.char_indices() { + match ch { + '[' => bracket_depth += 1, + ']' => { + if bracket_depth > 0 { + bracket_depth -= 1; + } + } + ':' if bracket_depth == 0 => { + last_colon_outside = Some(i); + } + _ => {} + } + } + + if let Some(colon_pos) = last_colon_outside { + let potential_port = &spec[colon_pos + 1..]; + // Check if it looks like a port (all digits) + if !potential_port.is_empty() && potential_port.chars().all(|c| c.is_ascii_digit()) { + let host_pattern = &spec[..colon_pos]; + let port_suffix = Some(format!(":{}", potential_port)); + return Ok((host_pattern, port_suffix)); + } + } + + Ok((spec, None)) +} + +/// Expand multiple comma-separated host specifications +/// +/// # Arguments +/// +/// * `specs` - Comma-separated host specifications +/// +/// # Returns +/// +/// A vector of all expanded host specifications, deduplicated. +pub fn expand_host_specs(specs: &str) -> Result, HostlistError> { + use super::split_patterns; + + if specs.is_empty() { + return Ok(Vec::new()); + } + + let patterns = split_patterns(specs)?; + let mut all_hosts = Vec::new(); + + for pattern in patterns { + let pattern = pattern.trim(); + if pattern.is_empty() { + continue; + } + + let expanded = expand_host_spec(pattern)?; + all_hosts.extend(expanded); + } + + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + let mut result = Vec::new(); + for host in all_hosts { + if seen.insert(host.clone()) { + result.push(host); + } + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + // Basic expansion tests + #[test] + fn test_expand_simple_range() { + let hosts = expand_hostlist("node[1-3]").unwrap(); + assert_eq!(hosts, vec!["node1", "node2", "node3"]); + } + + #[test] + fn test_expand_zero_padded_range() { + let hosts = expand_hostlist("node[01-05]").unwrap(); + assert_eq!( + hosts, + vec!["node01", "node02", "node03", "node04", "node05"] + ); + } + + #[test] + fn test_expand_comma_separated() { + let hosts = expand_hostlist("node[1,3,5]").unwrap(); + assert_eq!(hosts, vec!["node1", "node3", "node5"]); + } + + #[test] + fn test_expand_mixed_range() { + let hosts = expand_hostlist("node[1-3,7,9-10]").unwrap(); + assert_eq!( + hosts, + vec!["node1", "node2", "node3", "node7", "node9", "node10"] + ); + } + + #[test] + fn test_expand_cartesian_product() { + let hosts = expand_hostlist("rack[1-2]-node[1-3]").unwrap(); + assert_eq!( + hosts, + vec![ + "rack1-node1", + "rack1-node2", + "rack1-node3", + "rack2-node1", + "rack2-node2", + "rack2-node3" + ] + ); + } + + #[test] + fn test_expand_with_domain() { + let hosts = expand_hostlist("web[1-3].example.com").unwrap(); + assert_eq!( + hosts, + vec!["web1.example.com", "web2.example.com", "web3.example.com"] + ); + } + + #[test] + fn test_expand_no_range() { + let hosts = expand_hostlist("simple.host.com").unwrap(); + assert_eq!(hosts, vec!["simple.host.com"]); + } + + #[test] + fn test_expand_empty() { + let hosts = expand_hostlist("").unwrap(); + assert!(hosts.is_empty()); + } + + #[test] + fn test_expand_single_value_range() { + let hosts = expand_hostlist("node[5]").unwrap(); + assert_eq!(hosts, vec!["node5"]); + } + + #[test] + fn test_expand_three_digit_padding() { + let hosts = expand_hostlist("node[001-003]").unwrap(); + assert_eq!(hosts, vec!["node001", "node002", "node003"]); + } + + // Host spec expansion tests + #[test] + fn test_expand_host_spec_with_user() { + let hosts = expand_host_spec("admin@node[1-2]").unwrap(); + assert_eq!(hosts, vec!["admin@node1", "admin@node2"]); + } + + #[test] + fn test_expand_host_spec_with_port() { + let hosts = expand_host_spec("node[1-2]:22").unwrap(); + assert_eq!(hosts, vec!["node1:22", "node2:22"]); + } + + #[test] + fn test_expand_host_spec_full() { + let hosts = expand_host_spec("admin@web[1-2].example.com:22").unwrap(); + assert_eq!( + hosts, + vec!["admin@web1.example.com:22", "admin@web2.example.com:22"] + ); + } + + #[test] + fn test_expand_host_spec_no_expansion() { + let hosts = expand_host_spec("user@host.com:2222").unwrap(); + assert_eq!(hosts, vec!["user@host.com:2222"]); + } + + #[test] + fn test_expand_host_specs_multiple() { + let hosts = expand_host_specs("web[1-2],db[1-2]").unwrap(); + assert_eq!(hosts, vec!["web1", "web2", "db1", "db2"]); + } + + #[test] + fn test_expand_host_specs_with_user_port() { + let hosts = expand_host_specs("admin@web[1-2]:22,root@db[1-2]:3306").unwrap(); + assert_eq!( + hosts, + vec![ + "admin@web1:22", + "admin@web2:22", + "root@db1:3306", + "root@db2:3306" + ] + ); + } + + #[test] + fn test_expand_host_specs_deduplication() { + let hosts = expand_host_specs("node[1-3],node[2-4]").unwrap(); + assert_eq!(hosts, vec!["node1", "node2", "node3", "node4"]); + } + + // Error cases + #[test] + fn test_expand_too_large() { + // This would produce 1000 * 1000 = 1,000,000 hosts + let result = expand_hostlist("a[1-1000]-b[1-1000]"); + assert!(matches!(result, Err(HostlistError::RangeTooLarge { .. }))); + } + + #[test] + fn test_expand_empty_bracket() { + let result = expand_hostlist("node[]"); + assert!(matches!(result, Err(HostlistError::EmptyBracket { .. }))); + } + + #[test] + fn test_expand_reversed_range() { + let result = expand_hostlist("node[5-1]"); + assert!(matches!(result, Err(HostlistError::ReversedRange { .. }))); + } + + #[test] + fn test_expand_invalid_number() { + let result = expand_hostlist("node[a-z]"); + assert!(matches!(result, Err(HostlistError::InvalidNumber { .. }))); + } + + // Edge cases + #[test] + fn test_expand_large_but_valid_range() { + let hosts = expand_hostlist("node[1-1000]").unwrap(); + assert_eq!(hosts.len(), 1000); + assert_eq!(hosts[0], "node1"); + assert_eq!(hosts[999], "node1000"); + } + + #[test] + fn test_expand_prefix_only() { + let hosts = expand_hostlist("prefix-[1-2]").unwrap(); + assert_eq!(hosts, vec!["prefix-1", "prefix-2"]); + } + + #[test] + fn test_expand_suffix_only() { + let hosts = expand_hostlist("[1-2]-suffix").unwrap(); + assert_eq!(hosts, vec!["1-suffix", "2-suffix"]); + } + + #[test] + fn test_expand_range_only() { + let hosts = expand_hostlist("[1-3]").unwrap(); + assert_eq!(hosts, vec!["1", "2", "3"]); + } + + #[test] + fn test_expand_complex_domain() { + let hosts = expand_hostlist("app[1-2].prod.us-east-1.example.com").unwrap(); + assert_eq!( + hosts, + vec![ + "app1.prod.us-east-1.example.com", + "app2.prod.us-east-1.example.com" + ] + ); + } + + #[test] + fn test_port_suffix_parsing() { + // Test that port suffix is correctly separated + let (host, port) = parse_port_suffix("node[1-3]:22").unwrap(); + assert_eq!(host, "node[1-3]"); + assert_eq!(port, Some(":22".to_string())); + + // No port + let (host, port) = parse_port_suffix("node[1-3]").unwrap(); + assert_eq!(host, "node[1-3]"); + assert_eq!(port, None); + + // Domain with port + let (host, port) = parse_port_suffix("node[1-3].example.com:2222").unwrap(); + assert_eq!(host, "node[1-3].example.com"); + assert_eq!(port, Some(":2222".to_string())); + } +} diff --git a/src/hostlist/mod.rs b/src/hostlist/mod.rs new file mode 100644 index 00000000..c530bd61 --- /dev/null +++ b/src/hostlist/mod.rs @@ -0,0 +1,300 @@ +// Copyright 2025 Lablup Inc. and Jeongkyu Shin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Hostlist expression support for pdsh-style range expansion +//! +//! This module provides parsing and expansion of hostlist expressions, +//! allowing compact specification of multiple hosts using range notation. +//! +//! # Syntax +//! +//! The hostlist expression syntax supports: +//! - Simple range: `node[1-5]` -> `node1, node2, node3, node4, node5` +//! - Zero-padded range: `node[01-05]` -> `node01, node02, node03, node04, node05` +//! - Comma-separated values: `node[1,3,5]` -> `node1, node3, node5` +//! - Mixed ranges and values: `node[1-3,7,9-10]` -> 7 hosts +//! - Multiple ranges (cartesian product): `rack[1-2]-node[1-3]` -> 6 hosts +//! - Domain suffix: `web[1-3].example.com` -> 3 hosts +//! - File input: `^/path/to/file` -> read hosts from file +//! +//! # Examples +//! +//! ```rust +//! use bssh::hostlist::expand_hostlist; +//! +//! // Simple range expansion +//! let hosts = expand_hostlist("node[1-3]").unwrap(); +//! assert_eq!(hosts, vec!["node1", "node2", "node3"]); +//! +//! // Zero-padded expansion +//! let hosts = expand_hostlist("server[01-03]").unwrap(); +//! assert_eq!(hosts, vec!["server01", "server02", "server03"]); +//! +//! // Multiple ranges (cartesian product) +//! let hosts = expand_hostlist("rack[1-2]-node[1-2]").unwrap(); +//! assert_eq!(hosts, vec!["rack1-node1", "rack1-node2", "rack2-node1", "rack2-node2"]); +//! ``` + +mod error; +pub mod expander; +mod parser; + +pub use error::HostlistError; +pub use expander::{expand_host_spec, expand_host_specs, expand_hostlist}; +pub use parser::{parse_host_pattern, parse_hostfile, HostPattern}; + +/// Check if a pattern is a hostlist expression (contains numeric range brackets) +/// +/// Hostlist expressions have brackets containing numeric ranges like [1-5], [01-05], [1,2,3] +/// Glob patterns have brackets containing characters like [abc], [a-z], [!xyz] +pub fn is_hostlist_expression(pattern: &str) -> bool { + // A hostlist expression has [...] with numbers/ranges inside + if !pattern.contains('[') || !pattern.contains(']') { + return false; + } + + // Find bracket content and check if it looks like a hostlist range + let mut in_bracket = false; + let mut bracket_content = String::new(); + + for ch in pattern.chars() { + match ch { + '[' if !in_bracket => { + in_bracket = true; + bracket_content.clear(); + } + ']' if in_bracket => { + // Check if bracket content looks like a hostlist range + if looks_like_hostlist_range(&bracket_content) { + return true; + } + in_bracket = false; + } + _ if in_bracket => { + bracket_content.push(ch); + } + _ => {} + } + } + + false +} + +/// Check if bracket content looks like a hostlist numeric range +pub fn looks_like_hostlist_range(content: &str) -> bool { + if content.is_empty() { + return false; + } + + // Hostlist ranges are numeric: 1-5, 01-05, 1,2,3, 1-3,5-7 + // Glob patterns have letters: abc, a-z, !xyz + for part in content.split(',') { + let part = part.trim(); + if part.is_empty() { + continue; + } + + // Check if it's a range (contains -) + if part.contains('-') { + let parts: Vec<&str> = part.splitn(2, '-').collect(); + if parts.len() == 2 { + // Both parts should be numeric for hostlist + if parts[0].chars().all(|c| c.is_ascii_digit()) + && parts[1].chars().all(|c| c.is_ascii_digit()) + { + return true; + } + } + } else { + // Single value should be numeric for hostlist + if part.chars().all(|c| c.is_ascii_digit()) { + return true; + } + } + } + + false +} + +/// Expand a comma-separated list of host patterns +/// +/// This function handles multiple patterns separated by commas, +/// expanding each pattern and deduplicating the results. +/// +/// # Arguments +/// +/// * `expr` - A comma-separated list of host patterns +/// +/// # Returns +/// +/// A vector of expanded hostnames, deduplicated and in order. +/// +/// # Examples +/// +/// ```rust +/// use bssh::hostlist::expand_hostlist_patterns; +/// +/// let hosts = expand_hostlist_patterns("web[1-2],db[1-2]").unwrap(); +/// assert_eq!(hosts, vec!["web1", "web2", "db1", "db2"]); +/// ``` +pub fn expand_hostlist_patterns(expr: &str) -> Result, HostlistError> { + if expr.is_empty() { + return Ok(Vec::new()); + } + + // Handle file input with ^ prefix + if let Some(path) = expr.strip_prefix('^') { + return parse_hostfile(std::path::Path::new(path)); + } + + // Split by comma, but be careful about commas inside brackets + let patterns = split_patterns(expr)?; + + let mut all_hosts = Vec::new(); + for pattern in patterns { + let pattern = pattern.trim(); + if pattern.is_empty() { + continue; + } + + // Handle file input within comma-separated list + if let Some(path) = pattern.strip_prefix('^') { + let file_hosts = parse_hostfile(std::path::Path::new(path))?; + all_hosts.extend(file_hosts); + } else { + let expanded = expand_hostlist(pattern)?; + all_hosts.extend(expanded); + } + } + + // Deduplicate while preserving order + deduplicate_hosts(all_hosts) +} + +/// Split a hostlist expression by commas, respecting bracket boundaries +fn split_patterns(expr: &str) -> Result, HostlistError> { + let mut patterns = Vec::new(); + let mut current = String::new(); + let mut bracket_depth = 0; + + for ch in expr.chars() { + match ch { + '[' => { + bracket_depth += 1; + current.push(ch); + } + ']' => { + if bracket_depth == 0 { + return Err(HostlistError::UnmatchedBracket { + expression: expr.to_string(), + }); + } + bracket_depth -= 1; + current.push(ch); + } + ',' if bracket_depth == 0 => { + if !current.is_empty() { + patterns.push(current); + current = String::new(); + } + } + _ => current.push(ch), + } + } + + if bracket_depth != 0 { + return Err(HostlistError::UnclosedBracket { + expression: expr.to_string(), + }); + } + + if !current.is_empty() { + patterns.push(current); + } + + Ok(patterns) +} + +/// Deduplicate hosts while preserving original order +fn deduplicate_hosts(hosts: Vec) -> Result, HostlistError> { + let mut seen = std::collections::HashSet::new(); + let mut result = Vec::new(); + + for host in hosts { + if seen.insert(host.clone()) { + result.push(host); + } + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_expand_hostlist_patterns_empty() { + let result = expand_hostlist_patterns("").unwrap(); + assert!(result.is_empty()); + } + + #[test] + fn test_expand_hostlist_patterns_single() { + let result = expand_hostlist_patterns("node[1-3]").unwrap(); + assert_eq!(result, vec!["node1", "node2", "node3"]); + } + + #[test] + fn test_expand_hostlist_patterns_multiple() { + let result = expand_hostlist_patterns("web[1-2],db[1-2]").unwrap(); + assert_eq!(result, vec!["web1", "web2", "db1", "db2"]); + } + + #[test] + fn test_expand_hostlist_patterns_with_whitespace() { + let result = expand_hostlist_patterns("web[1-2], db[1-2]").unwrap(); + assert_eq!(result, vec!["web1", "web2", "db1", "db2"]); + } + + #[test] + fn test_expand_hostlist_patterns_deduplication() { + let result = expand_hostlist_patterns("node[1-3],node[2-4]").unwrap(); + assert_eq!(result, vec!["node1", "node2", "node3", "node4"]); + } + + #[test] + fn test_split_patterns_simple() { + let patterns = split_patterns("a,b,c").unwrap(); + assert_eq!(patterns, vec!["a", "b", "c"]); + } + + #[test] + fn test_split_patterns_with_brackets() { + let patterns = split_patterns("node[1,2,3],web[1-3]").unwrap(); + assert_eq!(patterns, vec!["node[1,2,3]", "web[1-3]"]); + } + + #[test] + fn test_split_patterns_unclosed_bracket() { + let result = split_patterns("node[1-3"); + assert!(result.is_err()); + } + + #[test] + fn test_split_patterns_unmatched_bracket() { + let result = split_patterns("node]1-3["); + assert!(result.is_err()); + } +} diff --git a/src/hostlist/parser.rs b/src/hostlist/parser.rs new file mode 100644 index 00000000..364acce9 --- /dev/null +++ b/src/hostlist/parser.rs @@ -0,0 +1,602 @@ +// Copyright 2025 Lablup Inc. and Jeongkyu Shin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Parser for hostlist expressions +//! +//! This module parses hostlist expressions into structured representations +//! that can be expanded into lists of hostnames. + +use super::error::HostlistError; +use std::path::Path; + +/// Represents a parsed range item (single value or range) +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RangeItem { + /// A single numeric value + Single(i64), + /// A range from start to end (inclusive) + Range { start: i64, end: i64 }, +} + +/// Represents a parsed range expression with padding information +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RangeExpression { + /// The range items (values and ranges) + pub items: Vec, + /// The minimum padding width (determined from zero-padded numbers) + pub padding: usize, +} + +/// Represents a segment of a host pattern (literal text or range expression) +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PatternSegment { + /// Literal text (e.g., "node", ".example.com") + Literal(String), + /// Range expression (e.g., [1-5], [01-05]) + Range(RangeExpression), +} + +/// Represents a complete host pattern +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HostPattern { + /// The segments making up this pattern + pub segments: Vec, +} + +impl HostPattern { + /// Check if this pattern contains any range expressions + pub fn has_ranges(&self) -> bool { + self.segments + .iter() + .any(|s| matches!(s, PatternSegment::Range(_))) + } + + /// Get the expected expansion count (product of all range sizes) + pub fn expansion_count(&self) -> usize { + let mut count = 1usize; + for segment in &self.segments { + if let PatternSegment::Range(range) = segment { + count = count.saturating_mul(range.item_count()); + } + } + count + } +} + +impl RangeExpression { + /// Count the total number of values this range expression represents + pub fn item_count(&self) -> usize { + self.items.iter().map(|item| item.count()).sum() + } + + /// Generate all values from this range expression + pub fn values(&self) -> Vec { + let mut result = Vec::new(); + for item in &self.items { + match item { + RangeItem::Single(v) => result.push(*v), + RangeItem::Range { start, end } => { + for v in *start..=*end { + result.push(v); + } + } + } + } + result + } + + /// Format a value with the appropriate padding + pub fn format_value(&self, value: i64) -> String { + if self.padding > 0 { + format!("{:0>width$}", value, width = self.padding) + } else { + value.to_string() + } + } +} + +impl RangeItem { + /// Count the number of values this item represents + pub fn count(&self) -> usize { + match self { + RangeItem::Single(_) => 1, + RangeItem::Range { start, end } => { + if end >= start { + (end - start + 1) as usize + } else { + 0 + } + } + } + } +} + +/// Parse a host pattern string into a HostPattern structure +/// +/// # Arguments +/// +/// * `pattern` - The host pattern string to parse +/// +/// # Returns +/// +/// A parsed HostPattern structure or an error. +/// +/// # Examples +/// +/// ```rust +/// use bssh::hostlist::parse_host_pattern; +/// +/// let pattern = parse_host_pattern("node[1-3]").unwrap(); +/// assert!(pattern.has_ranges()); +/// assert_eq!(pattern.expansion_count(), 3); +/// ``` +pub fn parse_host_pattern(pattern: &str) -> Result { + if pattern.is_empty() { + return Ok(HostPattern { + segments: Vec::new(), + }); + } + + let mut segments = Vec::new(); + let mut current_literal = String::new(); + let mut chars = pattern.chars().peekable(); + let mut bracket_depth = 0; + + while let Some(ch) = chars.next() { + match ch { + '[' => { + if bracket_depth > 0 { + return Err(HostlistError::NestedBrackets { + expression: pattern.to_string(), + }); + } + + // Check for IPv6 literal (starts with digit or colon after [) + if let Some(&next_ch) = chars.peek() { + if is_ipv6_start(next_ch, &chars) { + // This might be an IPv6 literal, collect until matching ] + current_literal.push(ch); + continue; + } + } + + // Save any accumulated literal + if !current_literal.is_empty() { + segments.push(PatternSegment::Literal(current_literal.clone())); + current_literal.clear(); + } + + // Parse range expression + bracket_depth = 1; + let mut range_content = String::new(); + + for inner_ch in chars.by_ref() { + match inner_ch { + '[' => { + return Err(HostlistError::NestedBrackets { + expression: pattern.to_string(), + }); + } + ']' => { + bracket_depth = 0; + break; + } + _ => range_content.push(inner_ch), + } + } + + if bracket_depth != 0 { + return Err(HostlistError::UnclosedBracket { + expression: pattern.to_string(), + }); + } + + if range_content.is_empty() { + return Err(HostlistError::EmptyBracket { + expression: pattern.to_string(), + }); + } + + let range_expr = parse_range_expression(&range_content, pattern)?; + segments.push(PatternSegment::Range(range_expr)); + } + ']' => { + if bracket_depth == 0 { + return Err(HostlistError::UnmatchedBracket { + expression: pattern.to_string(), + }); + } + bracket_depth -= 1; + } + _ => { + current_literal.push(ch); + } + } + } + + // Save any remaining literal + if !current_literal.is_empty() { + segments.push(PatternSegment::Literal(current_literal)); + } + + Ok(HostPattern { segments }) +} + +/// Check if a character sequence might be the start of an IPv6 literal +fn is_ipv6_start(next_ch: char, _chars: &std::iter::Peekable) -> bool { + // IPv6 literals start with a colon (e.g., [::1] or [2001:db8::1]) + // We use a conservative heuristic: only treat as IPv6 if we see a colon + // This means hex digits like 'a' will be treated as potential hostlist content + // and will fail with InvalidNumber if they're not valid numeric ranges + next_ch == ':' +} + +/// Parse a range expression (content between brackets) +fn parse_range_expression(content: &str, pattern: &str) -> Result { + let mut items = Vec::new(); + let mut max_padding = 0; + + // Split by comma to get individual range items + for item_str in content.split(',') { + let item_str = item_str.trim(); + if item_str.is_empty() { + continue; + } + + // Check if this is a range (contains -) + if let Some(dash_pos) = item_str.find('-') { + // Could be a negative number or a range + // If dash is at position 0, it's a negative number start + if dash_pos == 0 { + // Starts with -, could be negative number or negative range start + let rest = &item_str[1..]; + if let Some(second_dash) = rest.find('-') { + // Negative start to something: -5-10 means -5 to 10 + let start_str = &item_str[..=second_dash]; + let end_str = &rest[second_dash + 1..]; + let (start, start_padding) = parse_number(start_str, pattern)?; + let (end, end_padding) = parse_number(end_str, pattern)?; + + if start > end { + return Err(HostlistError::ReversedRange { + expression: pattern.to_string(), + start, + end, + }); + } + + max_padding = max_padding.max(start_padding).max(end_padding); + items.push(RangeItem::Range { start, end }); + } else { + // Just a negative number + let (value, padding) = parse_number(item_str, pattern)?; + max_padding = max_padding.max(padding); + items.push(RangeItem::Single(value)); + } + } else { + // Normal range: start-end + let start_str = &item_str[..dash_pos]; + let end_str = &item_str[dash_pos + 1..]; + + // Check for negative end (e.g., 5--3 is invalid, but handle gracefully) + if end_str.starts_with('-') && !end_str[1..].starts_with('-') { + // Negative end value + let (start, start_padding) = parse_number(start_str, pattern)?; + let (end, end_padding) = parse_number(end_str, pattern)?; + + if start > end { + return Err(HostlistError::ReversedRange { + expression: pattern.to_string(), + start, + end, + }); + } + + max_padding = max_padding.max(start_padding).max(end_padding); + items.push(RangeItem::Range { start, end }); + } else { + let (start, start_padding) = parse_number(start_str, pattern)?; + let (end, end_padding) = parse_number(end_str, pattern)?; + + if start > end { + return Err(HostlistError::ReversedRange { + expression: pattern.to_string(), + start, + end, + }); + } + + max_padding = max_padding.max(start_padding).max(end_padding); + items.push(RangeItem::Range { start, end }); + } + } + } else { + // Single value + let (value, padding) = parse_number(item_str, pattern)?; + max_padding = max_padding.max(padding); + items.push(RangeItem::Single(value)); + } + } + + if items.is_empty() { + return Err(HostlistError::EmptyBracket { + expression: pattern.to_string(), + }); + } + + Ok(RangeExpression { + items, + padding: max_padding, + }) +} + +/// Parse a number string, returning the value and padding width +fn parse_number(s: &str, pattern: &str) -> Result<(i64, usize), HostlistError> { + let s = s.trim(); + if s.is_empty() { + return Err(HostlistError::InvalidNumber { + expression: pattern.to_string(), + value: s.to_string(), + }); + } + + // Determine padding from leading zeros + let digits = if let Some(rest) = s.strip_prefix('-') { + rest + } else { + s + }; + + // Count padding (leading zeros) + let padding = if digits.len() > 1 && digits.starts_with('0') { + digits.len() + } else { + 0 + }; + + // Parse the number (includes sign if present) + let value: i64 = s.parse().map_err(|_| HostlistError::InvalidNumber { + expression: pattern.to_string(), + value: s.to_string(), + })?; + + Ok((value, padding)) +} + +/// Maximum file size for hostfile (1 MB) +const MAX_HOSTFILE_SIZE: u64 = 1024 * 1024; + +/// Maximum number of lines in a hostfile +const MAX_HOSTFILE_LINES: usize = 100_000; + +/// Parse hosts from a file (one per line) +/// +/// # Arguments +/// +/// * `path` - Path to the hostfile +/// +/// # Returns +/// +/// A vector of hostnames read from the file. +/// +/// # Security +/// +/// This function implements resource limits to prevent DoS attacks: +/// - Maximum file size: 1 MB +/// - Maximum number of lines: 100,000 +pub fn parse_hostfile(path: &Path) -> Result, HostlistError> { + // Check file size before reading to prevent resource exhaustion + let metadata = std::fs::metadata(path).map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + HostlistError::FileNotFound { + path: path.display().to_string(), + } + } else { + HostlistError::FileReadError { + path: path.display().to_string(), + reason: e.to_string(), + } + } + })?; + + let file_size = metadata.len(); + if file_size > MAX_HOSTFILE_SIZE { + return Err(HostlistError::FileReadError { + path: path.display().to_string(), + reason: format!( + "file size {} bytes exceeds maximum allowed size of {} bytes", + file_size, MAX_HOSTFILE_SIZE + ), + }); + } + + let content = std::fs::read_to_string(path).map_err(|e| HostlistError::FileReadError { + path: path.display().to_string(), + reason: e.to_string(), + })?; + + let hosts: Vec = content + .lines() + .take(MAX_HOSTFILE_LINES) + .map(|line| line.trim()) + .filter(|line| !line.is_empty() && !line.starts_with('#')) + .map(String::from) + .collect(); + + // Check if we hit the line limit + if content.lines().count() > MAX_HOSTFILE_LINES { + return Err(HostlistError::FileReadError { + path: path.display().to_string(), + reason: format!( + "file contains more than {} lines (limit exceeded)", + MAX_HOSTFILE_LINES + ), + }); + } + + Ok(hosts) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_simple_range() { + let pattern = parse_host_pattern("node[1-3]").unwrap(); + assert_eq!(pattern.segments.len(), 2); + + match &pattern.segments[0] { + PatternSegment::Literal(s) => assert_eq!(s, "node"), + _ => panic!("Expected literal"), + } + + match &pattern.segments[1] { + PatternSegment::Range(r) => { + assert_eq!(r.items.len(), 1); + assert_eq!(r.padding, 0); + match &r.items[0] { + RangeItem::Range { start, end } => { + assert_eq!(*start, 1); + assert_eq!(*end, 3); + } + _ => panic!("Expected range"), + } + } + _ => panic!("Expected range"), + } + } + + #[test] + fn test_parse_zero_padded_range() { + let pattern = parse_host_pattern("node[01-05]").unwrap(); + + match &pattern.segments[1] { + PatternSegment::Range(r) => { + assert_eq!(r.padding, 2); + assert_eq!(r.values(), vec![1, 2, 3, 4, 5]); + } + _ => panic!("Expected range"), + } + } + + #[test] + fn test_parse_comma_separated_values() { + let pattern = parse_host_pattern("node[1,3,5]").unwrap(); + + match &pattern.segments[1] { + PatternSegment::Range(r) => { + assert_eq!(r.items.len(), 3); + assert_eq!(r.values(), vec![1, 3, 5]); + } + _ => panic!("Expected range"), + } + } + + #[test] + fn test_parse_mixed_range() { + let pattern = parse_host_pattern("node[1-3,7,9-10]").unwrap(); + + match &pattern.segments[1] { + PatternSegment::Range(r) => { + assert_eq!(r.values(), vec![1, 2, 3, 7, 9, 10]); + } + _ => panic!("Expected range"), + } + } + + #[test] + fn test_parse_multiple_ranges() { + let pattern = parse_host_pattern("rack[1-2]-node[1-3]").unwrap(); + assert_eq!(pattern.segments.len(), 4); + assert!(pattern.has_ranges()); + assert_eq!(pattern.expansion_count(), 6); + } + + #[test] + fn test_parse_with_domain() { + let pattern = parse_host_pattern("web[1-3].example.com").unwrap(); + assert_eq!(pattern.segments.len(), 3); + + match &pattern.segments[2] { + PatternSegment::Literal(s) => assert_eq!(s, ".example.com"), + _ => panic!("Expected literal"), + } + } + + #[test] + fn test_parse_no_range() { + let pattern = parse_host_pattern("simple.host.com").unwrap(); + assert_eq!(pattern.segments.len(), 1); + assert!(!pattern.has_ranges()); + assert_eq!(pattern.expansion_count(), 1); + } + + #[test] + fn test_parse_empty_bracket_error() { + let result = parse_host_pattern("node[]"); + assert!(matches!(result, Err(HostlistError::EmptyBracket { .. }))); + } + + #[test] + fn test_parse_unclosed_bracket_error() { + let result = parse_host_pattern("node[1-5"); + assert!(matches!(result, Err(HostlistError::UnclosedBracket { .. }))); + } + + #[test] + fn test_parse_unmatched_bracket_error() { + let result = parse_host_pattern("node]1-5["); + assert!(matches!( + result, + Err(HostlistError::UnmatchedBracket { .. }) + )); + } + + #[test] + fn test_parse_reversed_range_error() { + let result = parse_host_pattern("node[5-1]"); + assert!(matches!(result, Err(HostlistError::ReversedRange { .. }))); + } + + #[test] + fn test_parse_invalid_number_error() { + let result = parse_host_pattern("node[a-z]"); + assert!(matches!(result, Err(HostlistError::InvalidNumber { .. }))); + } + + #[test] + fn test_parse_nested_brackets_error() { + let result = parse_host_pattern("node[[1-2]]"); + assert!(matches!(result, Err(HostlistError::NestedBrackets { .. }))); + } + + #[test] + fn test_range_expression_format_value() { + let expr = RangeExpression { + items: vec![RangeItem::Range { start: 1, end: 5 }], + padding: 3, + }; + assert_eq!(expr.format_value(1), "001"); + assert_eq!(expr.format_value(12), "012"); + assert_eq!(expr.format_value(123), "123"); + } + + #[test] + fn test_range_item_count() { + assert_eq!(RangeItem::Single(5).count(), 1); + assert_eq!(RangeItem::Range { start: 1, end: 5 }.count(), 5); + assert_eq!(RangeItem::Range { start: 0, end: 0 }.count(), 1); + } +} diff --git a/src/lib.rs b/src/lib.rs index 011174c9..8a179a51 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ pub mod commands; pub mod config; pub mod executor; pub mod forwarding; +pub mod hostlist; pub mod jump; pub mod node; pub mod pty; diff --git a/src/main.rs b/src/main.rs index 4faab113..59804814 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,6 +16,7 @@ use anyhow::Result; use bssh::cli::{ has_pdsh_compat_flag, is_pdsh_compat_mode, remove_pdsh_compat_flag, Cli, Commands, PdshCli, }; +use bssh::hostlist; use clap::Parser; use glob::Pattern; @@ -92,74 +93,92 @@ async fn run_pdsh_mode(args: &[String]) -> Result<()> { /// Handle pdsh query mode (-q) /// /// Shows the list of hosts that would be targeted and exits. +/// Supports hostlist expression expansion (e.g., node[1-5], rack[1-2]-node[1-3]) /// Uses the same glob pattern matching as the standard --exclude option /// for consistency. async fn handle_pdsh_query_mode(pdsh_cli: &PdshCli) -> Result<()> { if let Some(ref hosts_str) = pdsh_cli.hosts { - let hosts: Vec<&str> = hosts_str.split(',').map(|s| s.trim()).collect(); - - // Compile exclusion patterns (same logic as app/nodes.rs exclude_nodes) - let exclusion_patterns: Vec = if let Some(ref exclude_str) = pdsh_cli.exclude { - let patterns: Vec<&str> = exclude_str.split(',').map(|s| s.trim()).collect(); - let mut compiled = Vec::with_capacity(patterns.len()); - for pattern in patterns { - // Security: Validate pattern length - const MAX_PATTERN_LENGTH: usize = 256; - if pattern.len() > MAX_PATTERN_LENGTH { - anyhow::bail!( - "Exclusion pattern too long (max {MAX_PATTERN_LENGTH} characters)" - ); - } - - // Security: Skip empty patterns - if pattern.is_empty() { - continue; - } - - // Security: Prevent excessive wildcards - let wildcard_count = pattern.chars().filter(|c| *c == '*' || *c == '?').count(); - const MAX_WILDCARDS: usize = 10; - if wildcard_count > MAX_WILDCARDS { - anyhow::bail!( - "Exclusion pattern contains too many wildcards (max {MAX_WILDCARDS})" - ); - } + // Expand hostlist expressions (e.g., node[1-5], rack[1-2]-node[1-3]) + let hosts: Vec = hostlist::expand_host_specs(hosts_str) + .map_err(|e| anyhow::anyhow!("Failed to expand host expression: {e}"))?; + + // Process exclusion patterns (supports both glob patterns and hostlist expressions) + let (expanded_exclusions, glob_exclusions): (Vec, Vec) = + if let Some(ref exclude_str) = pdsh_cli.exclude { + let mut expanded = Vec::new(); + let mut globs = Vec::new(); + + for pattern in exclude_str.split(',').map(|s| s.trim()) { + // Security: Validate pattern length + const MAX_PATTERN_LENGTH: usize = 256; + if pattern.len() > MAX_PATTERN_LENGTH { + anyhow::bail!( + "Exclusion pattern too long (max {MAX_PATTERN_LENGTH} characters)" + ); + } - // Compile the glob pattern - match Pattern::new(pattern) { - Ok(p) => compiled.push(p), - Err(_) => { - anyhow::bail!("Invalid exclusion pattern: {pattern}"); + // Security: Skip empty patterns + if pattern.is_empty() { + continue; } - } - } - compiled - } else { - Vec::new() - }; - // Filter and display hosts - for host in hosts { - // Check if host matches any exclusion pattern - let is_excluded = if exclusion_patterns.is_empty() { - false - } else { - exclusion_patterns.iter().any(|pattern| { - // For patterns without wildcards, also do exact/contains matching - // (consistent with exclude_nodes in app/nodes.rs) - let pattern_str = pattern.as_str(); - if !pattern_str.contains('*') - && !pattern_str.contains('?') - && !pattern_str.contains('[') - { - host == pattern_str || host.contains(pattern_str) + // Check if it's a hostlist expression (contains numeric range brackets) + if hostlist::is_hostlist_expression(pattern) { + // Expand hostlist expression + let expanded_hosts = hostlist::expand_host_specs(pattern).map_err(|e| { + anyhow::anyhow!("Failed to expand exclusion pattern: {e}") + })?; + expanded.extend(expanded_hosts); } else { - pattern.matches(host) + // Security: Prevent excessive wildcards for glob patterns + let wildcard_count = + pattern.chars().filter(|c| *c == '*' || *c == '?').count(); + const MAX_WILDCARDS: usize = 10; + if wildcard_count > MAX_WILDCARDS { + anyhow::bail!( + "Exclusion pattern contains too many wildcards (max {MAX_WILDCARDS})" + ); + } + + // Compile the glob pattern + match Pattern::new(pattern) { + Ok(p) => globs.push(p), + Err(_) => { + anyhow::bail!("Invalid exclusion pattern: {pattern}"); + } + } } - }) + } + (expanded, globs) + } else { + (Vec::new(), Vec::new()) }; - if !is_excluded { + // Create a set for O(1) lookup of expanded exclusions + let exclusion_set: std::collections::HashSet<&str> = + expanded_exclusions.iter().map(|s| s.as_str()).collect(); + + // Filter and display hosts + for host in &hosts { + // Check if host is in the expanded exclusion set + let is_excluded_by_hostlist = exclusion_set.contains(host.as_str()); + + // Check if host matches any glob exclusion pattern + let is_excluded_by_glob = glob_exclusions.iter().any(|pattern| { + // For patterns without wildcards, also do exact/contains matching + // (consistent with exclude_nodes in app/nodes.rs) + let pattern_str = pattern.as_str(); + if !pattern_str.contains('*') + && !pattern_str.contains('?') + && !pattern_str.contains('[') + { + host == pattern_str || host.contains(pattern_str) + } else { + pattern.matches(host) + } + }); + + if !is_excluded_by_hostlist && !is_excluded_by_glob { println!("{host}"); } }