diff --git a/Cargo.toml b/Cargo.toml index 209ec251..f3807c64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bssh" -version = "0.2.0" +version = "0.3.0" edition = "2021" authors = ["Jeongkyu Shin"] description = "Parallel SSH command execution tool for cluster management" @@ -22,6 +22,7 @@ indicatif = "0.18" rpassword = "7" directories = "5" dirs = "6.0" +chrono = "0.4" [dev-dependencies] tempfile = "3" diff --git a/README.md b/README.md index 5a52d3df..10df7e84 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ A high-performance parallel SSH command execution tool for cluster management, b - **Flexible Authentication**: Support for SSH keys and SSH agent - **Host Key Verification**: Secure host key checking with known_hosts support - **Cross-Platform**: Works on Linux and macOS +- **Output Management**: Save command outputs to files per node with detailed logging ## Installation @@ -158,6 +159,52 @@ bssh -c webservers "sudo systemctl restart nginx" bssh -c production --output-dir ./logs "tail -n 100 /var/log/syslog" ``` +## Output File Management + +When using the `--output-dir` option, bssh saves command outputs to structured files: + +### File Structure +``` +output-dir/ +├── hostname1_20250821_143022.stdout # Standard output +├── hostname1_20250821_143022.stderr # Standard error (if any) +├── hostname2_20250821_143022.stdout # Per-node outputs +├── hostname2_20250821_143022.error # Connection/execution errors +├── hostname3_20250821_143022.empty # Marker for no output +└── summary_20250821_143022.txt # Overall execution summary +``` + +### File Types +- **`.stdout`**: Contains standard output from successful commands +- **`.stderr`**: Contains standard error output (created only if stderr is not empty) +- **`.error`**: Contains error messages for failed connections or executions +- **`.empty`**: Marker file when command produces no output +- **`summary_*.txt`**: Overall execution summary with success/failure counts + +### File Headers +Each output file includes metadata headers: +``` +# Command: df -h +# Host: server1.example.com +# User: admin +# Exit Status: 0 +# Timestamp: 20250821_143022 + +[actual command output follows] +``` + +### Example Usage +```bash +# Save outputs to timestamped directory +bssh -c production --output-dir ./results/$(date +%Y%m%d) "ps aux | head -10" + +# Collect system information +bssh -c all-servers --output-dir ./system-info "uname -a; df -h; free -m" + +# Debug failed services +bssh -c webservers --output-dir ./debug "systemctl status nginx" +``` + ## Development ### Building diff --git a/docs/man/bssh.1 b/docs/man/bssh.1 index 33f95641..84d1c1a3 100644 --- a/docs/man/bssh.1 +++ b/docs/man/bssh.1 @@ -1,6 +1,6 @@ .\" Manpage for bssh .\" Contact the maintainers to correct errors or typos. -.TH BSSH 1 "August 21, 2025" "v0.2.0" "bssh Manual" +.TH BSSH 1 "August 21, 2025" "v0.3.0" "bssh Manual" .SH NAME bssh \- Backend.AI SSH - Parallel command execution across cluster nodes @@ -50,7 +50,8 @@ Maximum parallel connections (default: 10) .TP .BR \-\-output\-dir " " \fIOUTPUT_DIR\fR -Output directory for command results +Output directory for command results. When specified, saves command outputs +to separate files for each node with stdout, stderr, and execution summary .TP .BR \-v ", " \-\-verbose @@ -180,6 +181,17 @@ Use SSH agent for authentication: .TP Save output to files: .B bssh --output-dir ./results -c production "ps aux" +.RS +Creates timestamped files per node: +.br +- hostname_TIMESTAMP.stdout (standard output) +.br +- hostname_TIMESTAMP.stderr (error output) +.br +- hostname_TIMESTAMP.error (connection errors) +.br +- summary_TIMESTAMP.txt (execution summary) +.RE .SH EXIT STATUS .TP @@ -190,6 +202,33 @@ Success - all commands executed successfully on all nodes .B 1 Failure - one or more commands failed or connection errors occurred +.SH OUTPUT FILES +When using the +.B --output-dir +option, bssh creates the following files: + +.TP +.I hostname_YYYYMMDD_HHMMSS.stdout +Standard output from successful command execution + +.TP +.I hostname_YYYYMMDD_HHMMSS.stderr +Standard error output (created only if stderr is not empty) + +.TP +.I hostname_YYYYMMDD_HHMMSS.error +Error messages for failed connections or command execution + +.TP +.I hostname_YYYYMMDD_HHMMSS.empty +Marker file when command produces no output + +.TP +.I summary_YYYYMMDD_HHMMSS.txt +Overall execution summary with success/failure counts for all nodes + +Each output file includes metadata headers with command, host, user, exit status, and timestamp information. + .SH FILES .TP .I ~/.bssh/config.yaml diff --git a/src/main.rs b/src/main.rs index f9b68106..f05c5ecc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,8 @@ use anyhow::{Context, Result}; use clap::Parser; use std::path::Path; +use tokio::fs; +use tokio::io::AsyncWriteExt; use tracing_subscriber::EnvFilter; use bssh::{ @@ -25,6 +27,17 @@ use bssh::{ ssh::known_hosts::StrictHostKeyChecking, }; +struct ExecuteCommandParams<'a> { + nodes: Vec, + command: &'a str, + max_parallel: usize, + key_path: Option<&'a Path>, + verbose: bool, + strict_mode: StrictHostKeyChecking, + use_agent: bool, + output_dir: Option<&'a Path>, +} + #[tokio::main] async fn main() -> Result<()> { let cli = Cli::parse(); @@ -89,16 +102,17 @@ async fn main() -> Result<()> { } _ => { // Execute command - execute_command( + let params = ExecuteCommandParams { nodes, - &command, - cli.parallel, - cli.identity.as_deref(), - cli.verbose > 0, + command: &command, + max_parallel: cli.parallel, + key_path: cli.identity.as_deref(), + verbose: cli.verbose > 0, strict_mode, - cli.use_agent, - ) - .await?; + use_agent: cli.use_agent, + output_dir: cli.output_dir.as_deref(), + }; + execute_command(params).await?; } } @@ -199,31 +213,32 @@ async fn ping_nodes( Ok(()) } -async fn execute_command( - nodes: Vec, - command: &str, - max_parallel: usize, - key_path: Option<&Path>, - verbose: bool, - strict_mode: StrictHostKeyChecking, - use_agent: bool, -) -> Result<()> { - println!("Executing command on {} nodes: {}\n", nodes.len(), command); +async fn execute_command(params: ExecuteCommandParams<'_>) -> Result<()> { + println!( + "Executing command on {} nodes: {}\n", + params.nodes.len(), + params.command + ); - let key_path = key_path.map(|p| p.to_string_lossy().to_string()); + let key_path = params.key_path.map(|p| p.to_string_lossy().to_string()); let executor = ParallelExecutor::new_with_strict_mode_and_agent( - nodes, - max_parallel, + params.nodes, + params.max_parallel, key_path, - strict_mode, - use_agent, + params.strict_mode, + params.use_agent, ); - let results = executor.execute(command).await?; + let results = executor.execute(params.command).await?; + + // Save outputs to files if output_dir is specified + if let Some(dir) = params.output_dir { + save_outputs_to_files(&results, dir, params.command).await?; + } // Print results for result in &results { - result.print_output(verbose); + result.print_output(params.verbose); } // Print summary @@ -239,6 +254,168 @@ async fn execute_command( Ok(()) } +async fn save_outputs_to_files( + results: &[bssh::executor::ExecutionResult], + output_dir: &Path, + command: &str, +) -> Result<()> { + // Create output directory if it doesn't exist + fs::create_dir_all(output_dir) + .await + .with_context(|| format!("Failed to create output directory: {output_dir:?}"))?; + + // Get timestamp for unique file naming + let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S"); + + println!("\nSaving outputs to directory: {output_dir:?}"); + + for result in results { + let node_name = result.node.host.replace([':', '/'], "_"); + + match &result.result { + Ok(cmd_result) => { + // Save stdout if not empty + if !cmd_result.output.is_empty() { + let stdout_file = output_dir.join(format!("{node_name}_{timestamp}.stdout")); + let mut file = fs::File::create(&stdout_file).await.with_context(|| { + format!("Failed to create stdout file: {stdout_file:?}") + })?; + + // Write metadata header + let header = format!( + "# Command: {}\n# Host: {}\n# User: {}\n# Exit Status: {}\n# Timestamp: {}\n\n", + command, + result.node.host, + result.node.username, + cmd_result.exit_status, + timestamp + ); + file.write_all(header.as_bytes()).await?; + file.write_all(&cmd_result.output).await?; + file.flush().await?; + + println!(" ✓ Saved stdout for {} to {:?}", result.node, stdout_file); + } + + // Save stderr if not empty + if !cmd_result.stderr.is_empty() { + let stderr_file = output_dir.join(format!("{node_name}_{timestamp}.stderr")); + let mut file = fs::File::create(&stderr_file).await.with_context(|| { + format!("Failed to create stderr file: {stderr_file:?}") + })?; + + // Write metadata header + let header = format!( + "# Command: {}\n# Host: {}\n# User: {}\n# Exit Status: {}\n# Timestamp: {}\n\n", + command, + result.node.host, + result.node.username, + cmd_result.exit_status, + timestamp + ); + file.write_all(header.as_bytes()).await?; + file.write_all(&cmd_result.stderr).await?; + file.flush().await?; + + println!(" ✓ Saved stderr for {} to {:?}", result.node, stderr_file); + } + + // If both stdout and stderr are empty, create a marker file + if cmd_result.output.is_empty() && cmd_result.stderr.is_empty() { + let empty_file = output_dir.join(format!("{node_name}_{timestamp}.empty")); + let mut file = fs::File::create(&empty_file).await.with_context(|| { + format!("Failed to create empty marker file: {empty_file:?}") + })?; + + let content = format!( + "# Command: {}\n# Host: {}\n# User: {}\n# Exit Status: {}\n# Timestamp: {}\n\nCommand produced no output.\n", + command, + result.node.host, + result.node.username, + cmd_result.exit_status, + timestamp + ); + file.write_all(content.as_bytes()).await?; + file.flush().await?; + + println!( + " ✓ Command produced no output for {} (created marker file)", + result.node + ); + } + } + Err(e) => { + // Save error to a file + let error_file = output_dir.join(format!("{node_name}_{timestamp}.error")); + let mut file = fs::File::create(&error_file) + .await + .with_context(|| format!("Failed to create error file: {error_file:?}"))?; + + let content = format!( + "# Command: {}\n# Host: {}\n# User: {}\n# Timestamp: {}\n\nError: {}\n", + command, result.node.host, result.node.username, timestamp, e + ); + file.write_all(content.as_bytes()).await?; + file.flush().await?; + + println!(" ✗ Saved error for {} to {:?}", result.node, error_file); + } + } + } + + // Create a summary file + let summary_file = output_dir.join(format!("summary_{timestamp}.txt")); + let mut file = fs::File::create(&summary_file) + .await + .with_context(|| format!("Failed to create summary file: {summary_file:?}"))?; + + let mut summary = format!( + "Command Execution Summary\n{}\n\nCommand: {}\nTimestamp: {}\nTotal Nodes: {}\n\n", + "=".repeat(60), + command, + timestamp, + results.len() + ); + + summary.push_str("Node Results:\n"); + summary.push_str("-".repeat(40).as_str()); + summary.push('\n'); + + for result in results { + match &result.result { + Ok(cmd_result) => { + summary.push_str(&format!( + " {} - Exit Status: {} {}\n", + result.node, + cmd_result.exit_status, + if cmd_result.is_success() { + "✓" + } else { + "✗" + } + )); + } + Err(e) => { + summary.push_str(&format!(" {} - Error: {}\n", result.node, e)); + } + } + } + + let success_count = results.iter().filter(|r| r.is_success()).count(); + let failed_count = results.len() - success_count; + + summary.push_str(&format!( + "\nSummary: {success_count} successful, {failed_count} failed\n" + )); + + file.write_all(summary.as_bytes()).await?; + file.flush().await?; + + println!(" ✓ Saved execution summary to {summary_file:?}"); + + Ok(()) +} + async fn copy_file( nodes: Vec, source: &Path, diff --git a/src/ssh/pool.rs b/src/ssh/pool.rs index 861fbd3c..590ad3ef 100644 --- a/src/ssh/pool.rs +++ b/src/ssh/pool.rs @@ -37,20 +37,23 @@ struct ConnectionKey { } /// Connection pool for SSH connections. -/// +/// /// Currently a placeholder implementation due to async-ssh2-tokio limitations. /// Always creates new connections regardless of the `enabled` flag. pub struct ConnectionPool { /// Placeholder for future connection storage _connections: Arc>>, + #[allow(dead_code)] ttl: Duration, + #[allow(dead_code)] enabled: bool, + #[allow(dead_code)] max_connections: usize, } impl ConnectionPool { /// Create a new connection pool. - /// + /// /// Note: Pooling is not actually implemented due to library limitations. pub fn new(ttl: Duration, max_connections: usize, enabled: bool) -> Self { Self { @@ -60,11 +63,11 @@ impl ConnectionPool { max_connections, } } - + pub fn disabled() -> Self { Self::new(Duration::from_secs(0), 0, false) } - + pub fn with_defaults() -> Self { Self::new( Duration::from_secs(300), // 5 minutes TTL @@ -72,9 +75,9 @@ impl ConnectionPool { false, // disabled by default ) } - + /// Get or create a connection. - /// + /// /// Currently always creates a new connection due to async-ssh2-tokio limitations. /// The Client type doesn't support cloning or connection reuse. pub async fn get_or_create( @@ -92,7 +95,7 @@ impl ConnectionPool { port, user: user.to_string(), }; - + if self.enabled { trace!("Connection pooling enabled (placeholder mode)"); // In the future, we would check for existing connections here @@ -100,62 +103,56 @@ impl ConnectionPool { } else { trace!("Connection pooling disabled"); } - + // Always create new connection (pooling not possible with current library) debug!("Creating new SSH connection to {}@{}:{}", user, host, port); create_fn().await } - + /// Return a connection to the pool. - /// + /// /// Currently a no-op due to connection reuse limitations. - pub async fn return_connection( - &self, - _host: &str, - _port: u16, - _user: &str, - _client: Client, - ) { + pub async fn return_connection(&self, _host: &str, _port: u16, _user: &str, _client: Client) { // No-op: Client cannot be reused if self.enabled { trace!("Connection return requested (no-op in placeholder mode)"); } } - + /// Clean up expired connections. - /// + /// /// Currently a no-op. pub async fn cleanup_expired(&self) { if self.enabled { trace!("Cleanup requested (no-op in placeholder mode)"); } } - + /// Clear all connections from the pool. - /// + /// /// Currently a no-op. pub async fn clear(&self) { if self.enabled { trace!("Clear requested (no-op in placeholder mode)"); } } - + /// Get the number of pooled connections. - /// + /// /// Always returns 0 in the current implementation. pub async fn size(&self) -> usize { 0 // No actual pooling } - + pub fn is_enabled(&self) -> bool { self.enabled } - + pub fn enable(&mut self) { self.enabled = true; debug!("Connection pooling enabled"); } - + pub fn disable(&mut self) { self.enabled = false; debug!("Connection pooling disabled"); @@ -171,31 +168,31 @@ impl Default for ConnectionPool { #[cfg(test)] mod tests { use super::*; - + #[tokio::test] async fn test_pool_disabled_by_default() { let pool = ConnectionPool::with_defaults(); assert!(!pool.is_enabled()); assert_eq!(pool.size().await, 0); } - + #[tokio::test] async fn test_pool_cleanup() { let pool = ConnectionPool::new(Duration::from_millis(100), 10, true); - + // Pool starts empty assert_eq!(pool.size().await, 0); - + // Cleanup should work even on empty pool pool.cleanup_expired().await; assert_eq!(pool.size().await, 0); } - + #[tokio::test] async fn test_pool_clear() { let pool = ConnectionPool::new(Duration::from_secs(60), 10, true); - + pool.clear().await; assert_eq!(pool.size().await, 0); } -} \ No newline at end of file +}