diff --git a/deploy/roles/monitor/tasks/main.yml b/deploy/roles/monitor/tasks/main.yml index f750323d..91daf5a7 100644 --- a/deploy/roles/monitor/tasks/main.yml +++ b/deploy/roles/monitor/tasks/main.yml @@ -424,7 +424,7 @@ routes: # Route node alarms to node-webhook (one group per unique alert/host/IP/type/severity) - match_re: - alert_type: node-(unavailable|cpu|disk|network|vcpu|unreplied-syn-source|unreplied-syn-target) + alert_type: node-(unavailable|cpu|disk|network|vcpu|syn-(inbound|outbound)) severity: critical|warning|info|error group_by: ['alertname', 'instance', 'hostname', 'source_ip', 'target_ip', 'alert_type', 'severity', 'node_type'] receiver: 'node-webhook' @@ -432,7 +432,7 @@ # Also send node alarms to slack-notifications with the same fine-grained grouping - match_re: - alert_type: node-(unavailable|cpu|disk|network|vcpu|unreplied-syn-source|unreplied-syn-target) + alert_type: node-(unavailable|cpu|disk|network|vcpu|syn-(inbound|outbound)) severity: critical|warning|info|error group_by: ['alertname', 'instance', 'hostname', 'source_ip', 'target_ip', 'alert_type', 'severity', 'node_type'] receiver: 'slack-notifications' diff --git a/deploy/roles/monitor/templates/node-conntrack-anomaly.yml.j2 b/deploy/roles/monitor/templates/node-conntrack-anomaly.yml.j2 index fa69b75e..7e0c42fb 100644 --- a/deploy/roles/monitor/templates/node-conntrack-anomaly.yml.j2 +++ b/deploy/roles/monitor/templates/node-conntrack-anomaly.yml.j2 @@ -1,36 +1,32 @@ groups: - name: node-conntrack-anomaly rules: - # Rule 1: High unreplied SYN connections detected from source IP - - alert: HighUnrepliedSynFromSourceIP - expr: sum by (source_ip, instance, hostname) (conntrack_unreplied_syn_flows) > {{ source_threshold | default(150) }} - for: {{ source_duration | default("2m") }} + - alert: NodeConntrackSynAnomaly + # Unified expression, automatically grouped by direction + expr: sum by (direction, source_ip, target_ip, instance, hostname, tcp_state) (conntrack_unreplied_syn_flows) > {{ threshold | default(150) }} + for: {{ duration | default("2m") }} labels: - severity: {{ source_severity | default("warning") }} + # Dynamically reference labels generated by the script + severity: {{ severity | default("warning") }} component: network - alert_type: node-unreplied-syn-source + # alert_type can also be dynamically generated based on direction + alert_type: "node-syn-{{ $labels.direction }}" + direction: "{{ $labels.direction }}" + source_ip: "{{ $labels.source_ip }}" + target_ip: "{{ $labels.target_ip }}" + tcp_state: "{{ $labels.tcp_state }}" + hostname: "{{ $labels.hostname }}" annotations: - summary: "High unreplied SYN connections from source IP {{ $labels.source_ip }} (instance: {{ $labels.instance }})" + summary: "High unreplied SYN connections ({{ $labels.direction }}): {{ $labels.source_ip }} -> {{ $labels.target_ip }}" description: | + Direction: {{ $labels.direction }} Source IP: {{ $labels.source_ip }} - Total unreplied SYN connections: {{ $value }} - Instance: {{ $labels.instance }} - Hostname: {{ $labels.hostname }} - Query: conntrack_unreplied_syn_flows{source_ip="{{ $labels.source_ip }}"} - - # Rule 2: High unreplied SYN connections detected to target IP - - alert: HighUnrepliedSynToTargetIP - expr: sum by (target_ip, instance, hostname) (conntrack_unreplied_syn_flows) > {{ target_threshold | default(150) }} - for: {{ target_duration | default("2m") }} - labels: - severity: {{ target_severity | default("warning") }} - component: network - alert_type: node-unreplied-syn-target - annotations: - summary: "High unreplied SYN connections to target IP {{ $labels.target_ip }} (instance: {{ $labels.instance }})" - description: | Target IP: {{ $labels.target_ip }} Total unreplied SYN connections: {{ $value }} Instance: {{ $labels.instance }} Hostname: {{ $labels.hostname }} - Query: conntrack_unreplied_syn_flows{target_ip="{{ $labels.target_ip }}"} + TCP State: {{ $labels.tcp_state }} + Action Required: + - If inbound: Check if Target IP in {{ $labels.hostname }} is under SYN Flood attack. + - If outbound: Check if Source IP in {{ $labels.hostname }} (VM) is compromised or scanning. + Query: conntrack_unreplied_syn_flows{direction="{{ $labels.direction }}",source_ip="{{ $labels.source_ip }}",target_ip="{{ $labels.target_ip }}"} diff --git a/scripts/cloudrc b/scripts/cloudrc index f5c50e67..5d02ade7 100644 --- a/scripts/cloudrc +++ b/scripts/cloudrc @@ -64,7 +64,7 @@ function apply_fw() rule=$* if [ "$action" = '-I' -o "$action" = '-A' ]; then rule_no="" - [ "$chain" = "FORWARD" ] && rule_no=2 + [ "$chain" = "FORWARD" ] && rule_no=3 iptables -C $chain $rule 2>/dev/null || iptables $action $chain $rule_no $rule return elif [ "$action" = '-N' ]; then diff --git a/scripts/kvm/operation/block_ip.sh b/scripts/kvm/operation/block_ip.sh new file mode 100755 index 00000000..7cefc9ef --- /dev/null +++ b/scripts/kvm/operation/block_ip.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +cd `dirname $0` +source ../../cloudrc + +[ $# -ne 1 ] && echo "Usage: $0 " && exit 1 + +LOG_DIR="/opt/cloudland/log" +LOG_FILE="$LOG_DIR/black_list.log" + +# Create log directory if not exists +[ ! -d "$LOG_DIR" ] && mkdir -p "$LOG_DIR" + +# Log function +log() { + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "[$timestamp] $*" | tee -a "$LOG_FILE" +} + +ip=$1 +IPSET_NAME="blacklist" +CHAIN_NAME="BLACKLIST" + +# Create ipset if not exists +if ! ipset list "$IPSET_NAME" &>/dev/null; then + ipset create "$IPSET_NAME" hash:ip timeout 0 +fi + +# Create iptables chain if not exists +if ! iptables -L "$CHAIN_NAME" &>/dev/null; then + iptables -N "$CHAIN_NAME" + iptables -A "$CHAIN_NAME" -j RETURN +fi + +# Ensure BLACKLIST chain is at position 2 in FORWARD chain +check_chain_position() { + iptables -L FORWARD --line-numbers -n | grep -E "^[0-9]+[[:space:]]+$CHAIN_NAME" | awk '{print $1}' +} + +if ! iptables -C FORWARD -j "$CHAIN_NAME" &>/dev/null; then + # Chain not referenced, insert at position 2 + iptables -I FORWARD 2 -j "$CHAIN_NAME" +else + # Chain exists, check if at position 2 + pos=$(check_chain_position) + if [ "$pos" != "2" ]; then + iptables -D FORWARD -j "$CHAIN_NAME" + iptables -I FORWARD 2 -j "$CHAIN_NAME" + fi +fi + +# Validate IP format +if [[ ! "$ip" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]] && \ + [[ ! "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then + log "ERROR: Invalid IP address: $ip" + exit 1 +fi + +# Check if IP already in set +if ipset test "$IPSET_NAME" "$ip" &>/dev/null; then + log "INFO: IP $ip already in blacklist" + exit 0 +fi + +ipset add "$IPSET_NAME" "$ip" timeout 7200 +log "ACTION: Added $ip to blacklist" + +# Ensure iptables rule exists +if ! iptables -C "$CHAIN_NAME" -m set --match-set "$IPSET_NAME" src -j DROP &>/dev/null; then + iptables -I "$CHAIN_NAME" 1 -m set --match-set "$IPSET_NAME" src -j DROP +fi + +log "INFO: Blacklist update completed" diff --git a/scripts/kvm/operation/check_halfopen_connections.sh b/scripts/kvm/operation/check_halfopen_connections.sh new file mode 100755 index 00000000..8442e490 --- /dev/null +++ b/scripts/kvm/operation/check_halfopen_connections.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +cd `dirname $0` +source ../../cloudrc + +[ $# -ne 1 ] && echo "Usage: $0 " && exit 1 + +LOG_DIR="/opt/cloudland/log" +LOG_FILE="$LOG_DIR/black_list.log" + +# Create log directory if not exists +[ ! -d "$LOG_DIR" ] && mkdir -p "$LOG_DIR" + +# Log function +log() { + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "[$timestamp] $*" | tee -a "$LOG_FILE" +} + +THRESHOLD=$1 +BLOCK_SCRIPT="./block_ip.sh" + +# Get half-open connections, extract src/dst IPs, count and sort +result=$(conntrack -L 2>/dev/null | grep -E 'SYN_SENT.*UNREPLIED' | awk '{print $5, $6}' | sed 's/src=//g; s/dst=//g' | sort | uniq -c | sort -rn | head -20) + +if [ -z "$result" ]; then + log "INFO: No half-open connections found" + exit 0 +fi + +# Block IPs exceeding threshold +blocked_count=0 +echo "$result" | while read count src dst; do + if [ "$count" -gt "$THRESHOLD" ]; then + log "CRITICAL: Blocking syn attck from src $src to dst $dst (count: $count)" + $BLOCK_SCRIPT "$src" + ((blocked_count++)) + fi +done diff --git a/scripts/kvm/operation/cleanup_outdated_iptables.sh b/scripts/kvm/operation/cleanup_outdated_iptables.sh index 03f526df..9ae40be9 100755 --- a/scripts/kvm/operation/cleanup_outdated_iptables.sh +++ b/scripts/kvm/operation/cleanup_outdated_iptables.sh @@ -18,11 +18,11 @@ done lock_file="$run_dir/iptables.lock" exec 200>>"$lock_file" -iptables -I FORWARD -m state --state RELATED,ESTABLISHED -j ACCEPT for i in {1..10}; do ln=$(iptables -n -L FORWARD --line-numbers | grep 'state RELATED,ESTABLISHED' | tail -1 | cut -d' ' -f1) [ "$ln" = 1 ] && break iptables -D FORWARD $ln + iptables -I FORWARD -m state --state RELATED,ESTABLISHED -j ACCEPT done iptables -N secgroup-chain && iptables -A secgroup-chain -j ACCEPT ln=$(iptables -n -L secgroup-chain --line-numbers | grep 'ACCEPT' | head -1 | cut -d' ' -f1) diff --git a/scripts/kvm/operation/unblock_ip.sh b/scripts/kvm/operation/unblock_ip.sh new file mode 100755 index 00000000..267c38b5 --- /dev/null +++ b/scripts/kvm/operation/unblock_ip.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +cd `dirname $0` +source ../../cloudrc + +[ $# -ne 1 ] && echo "Usage: $0 " && exit 1 + +LOG_DIR="/opt/cloudland/log" +LOG_FILE="$LOG_DIR/black_list.log" + +# Create log directory if not exists +[ ! -d "$LOG_DIR" ] && mkdir -p "$LOG_DIR" + +# Log function +log() { + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "[$timestamp] $*" | tee -a "$LOG_FILE" +} + +ip=$1 +IPSET_NAME="blacklist" + +# Check if ipset exists +if ! ipset list "$IPSET_NAME" &>/dev/null; then + log "ERROR: Ipset '$IPSET_NAME' does not exist" + exit 1 +fi + +# Validate IP format +if [[ ! "$ip" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]] && \ + [[ ! "$ip" =~ ^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$ ]]; then + log "ERROR: Invalid IP address: $ip" + exit 1 +fi + +# Check if IP in set +if ! ipset test "$IPSET_NAME" "$ip" &>/dev/null; then + log "INFO: IP $ip not in blacklist" + exit 0 +fi + +ipset del "$IPSET_NAME" "$ip" +log "ACTION: Removed $ip from blacklist" + +log "INFO: Unblock completed" diff --git a/scripts/kvm/report_rc.sh b/scripts/kvm/report_rc.sh index 8ae25567..22515260 100755 --- a/scripts/kvm/report_rc.sh +++ b/scripts/kvm/report_rc.sh @@ -138,6 +138,12 @@ function check_system_router() fi } +function check_conntrack() +{ + [ -z "$syn_attack_threshold" ] && syn_attack_threshold=2000 + sudo $base_dir/operation/check_halfopen_connections.sh $syn_attack_threshold +} + function sync_instance() { flag_file=$run_dir/need_to_sync @@ -238,6 +244,7 @@ calc_resource sync_instance sync_delayed_job check_system_router +check_conntrack #probe_arp >/dev/null 2>&1 inst_status daily_job diff --git a/scripts/monitor/conntrack_unreplied_syn.sh b/scripts/monitor/conntrack_unreplied_syn.sh index 533b1030..ea7cb592 100755 --- a/scripts/monitor/conntrack_unreplied_syn.sh +++ b/scripts/monitor/conntrack_unreplied_syn.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Script to collect conntrack unreplied SYN connections and export as Prometheus metrics -# Output format: conntrack_unreplied_syn_flows{source_ip="x.x.x.x",target_ip="y.y.y.y",proto="tcp",state="SYN_SENT"} count +# Output format: conntrack_unreplied_syn_flows{direction="outbound|inbound",tcp_state="SYN_SENT|SYN_RECV",source_ip="x.x.x.x",target_ip="y.y.y.y",hostname="node"} count # # Configuration @@ -16,32 +16,34 @@ mkdir -p "$OUTPUT_DIR" TMP_FILE=$(mktemp /tmp/conntrack_unreplied_syn.XXXXXX) trap "rm -f $TMP_FILE" EXIT -# Collect conntrack data and process - emit source_ip/target_ip +# Collect conntrack data and process - emit source_ip/target_ip with direction conntrack -L 2>/dev/null \ - | grep SYN_SENT | grep UNREPLIED \ + | awk '/(SYN_SENT|SYN_RECV).*UNREPLIED/' \ | awk '{ - proto=$1; # Protocol field, e.g., tcp - state=$4; # State field, e.g., SYN_SENT + proto=$1; + state=$4; src=""; dst=""; # Scan all fields to extract src=... and dst=... for (i = 1; i <= NF; i++) { if ($i ~ /^src=/) src=$i; else if ($i ~ /^dst=/) dst=$i; } + sub(/^src=/,"",src); + sub(/^dst=/,"",dst); + + # Define direction based on TCP state + direction="unknown"; + if (state == "SYN_SENT") direction="outbound"; + if (state == "SYN_RECV") direction="inbound"; + if (src != "" && dst != "") - print proto, state, src, dst; + print proto, state, src, dst, direction; }' \ - | sort | uniq -c | sort -nr | head -20 \ + | sort | uniq -c | sort -nr | head -50 \ | awk -v hostname="$HOSTNAME_VAL" '{ - count=$1; - proto=$2; - state=$3; - src=$4; - dst=$5; - sub(/^src=/,"",src); - sub(/^dst=/,"",dst); - printf "conntrack_unreplied_syn_flows{source_ip=\"%s\",target_ip=\"%s\",proto=\"%s\",state=\"%s\",hostname=\"%s\"} %d\n", - src, dst, proto, state, hostname, count; + count=$1; proto=$2; state=$3; src=$4; dst=$5; dir=$6; + printf "conntrack_unreplied_syn_flows{direction=\"%s\",tcp_state=\"%s\",source_ip=\"%s\",target_ip=\"%s\",hostname=\"%s\"} %d\n", + dir, state, src, dst, hostname, count; }' > "$TMP_FILE" # Write to output file atomically - always overwrite to ensure fresh data diff --git a/web/templates/alarms_new.tmpl b/web/templates/alarms_new.tmpl index 3c91001d..d289cda4 100644 --- a/web/templates/alarms_new.tmpl +++ b/web/templates/alarms_new.tmpl @@ -100,12 +100,9 @@ document.addEventListener('DOMContentLoaded', function() { "for_duration": "5m" }, 'conntrack': { - "source_threshold": 150, - "source_duration": "2m", - "source_severity": "warning", - "target_threshold": 150, - "target_duration": "2m", - "target_severity": "warning" + "threshold": 150, + "duration": "2m", + "severity": "warning" } };