Server Health Report - $(date)
Server | Status | CPU Load | Memory | Disk | Services | Updates | Security |
---|---|---|---|---|---|---|---|
$server | $status | $cpu | $mem | $disk | $services | $updates | $security |
Summary
Total Critical Issues: $TOTAL_CRITICAL
Total Warnings: $TOTAL_WARNING
HTML_FOOT } check_server_health() { local server=$1 echo -e "\nChecking ${GREEN}$server${NC}" # Test SSH connection if ! ssh -i "$SSH_KEY" -o ConnectTimeout=$SSH_TIMEOUT -o BatchMode=yes "$SSH_USER@$server" true 2>/dev/null; then add_html_row "$server" "Offline" "" "" "" "" "" echo -e "${RED} ➔ Offline${NC}" return fi # Get health data local health_data=$(ssh -i "$SSH_KEY" -T "$SSH_USER@$server" <<'EOF' { # CPU cpu_cores=$(nproc) load_avg=$(awk '{print $1,$2,$3}' /proc/loadavg) load1=$(awk '{print $1}' /proc/loadavg) # Memory mem_usage=$(free -m | awk '/Mem:/{printf "%.1f", $3/$2*100}') # Disk disk_usage=$(df -h | awk '/^\/dev/{print $1"|"$5"|"$6}' | head -3) # Services services_down=$(systemctl is-active sshd crond firewalld auditd 2>/dev/null | grep -c inactive) # Updates updates_available=$(dnf check-update -q | wc -l) # Security selinux_status=$(getenforce) firewall_status=$(firewall-cmd --state 2>&1) echo -n "{" echo -n "\"cpu_cores\":$cpu_cores," echo -n "\"load_avg\":\"$load_avg\"," echo -n "\"load1\":$load1," echo -n "\"mem_usage\":$mem_usage," echo -n "\"disk_usage\":\"$disk_usage\"," echo -n "\"services_down\":$services_down," echo -n "\"updates_available\":$updates_available," echo -n "\"selinux_status\":\"$selinux_status\"," echo -n "\"firewall_status\":\"$firewall_status\"" echo "}" } EOF ) # Parse JSON data local cpu_cores=$(jq -r '.cpu_cores' <<< "$health_data") local load_avg=$(jq -r '.load_avg' <<< "$health_data") local load1=$(jq -r '.load1' <<< "$health_data") local mem_usage=$(jq -r '.mem_usage' <<< "$health_data") local disk_usage=$(jq -r '.disk_usage' <<< "$health_data") local services_down=$(jq -r '.services_down' <<< "$health_data") local updates_available=$(jq -r '.updates_available' <<< "$health_data") local selinux_status=$(jq -r '.selinux_status' <<< "$health_data") local firewall_status=$(jq -r '.firewall_status' <<< "$health_data") # Analyze metrics local status="OK" local critical=0 local warning=0 # CPU Analysis local load_pct=$(awk -v cores="$cpu_cores" -v load="$load1" 'BEGIN {printf "%.0f", (load/cores)*100}') local cpu_status="Normal" if [ $load_pct -ge $CPU_CRITICAL ]; then cpu_status="CRITICAL ($load_pct%)" ((critical++)) elif [ $load_pct -ge $CPU_WARNING ]; then cpu_status="WARNING ($load_pct%)" ((warning++)) else cpu_status="$load_avg" fi # Memory Analysis local mem_status="Normal" if [ $(echo "$mem_usage >= $MEM_CRITICAL" | bc) -eq 1 ]; then mem_status="CRITICAL (${mem_usage}%)" ((critical++)) elif [ $(echo "$mem_usage >= $MEM_WARNING" | bc) -eq 1 ]; then mem_status="WARNING (${mem_usage}%)" ((warning++)) else mem_status="${mem_usage}%" fi # Disk Analysis local disk_status="" while IFS='|' read -r device usage mount; do usage=${usage%\%} if [ $usage -ge $DISK_CRITICAL ]; then disk_status+="$device@$mount: ${usage}%" ((critical++)) elif [ $usage -ge $DISK_WARNING ]; then disk_status+="$device@$mount: ${usage}%
" ((warning++)) else disk_status+="$device@$mount: ${usage}%
" fi done <<< "$disk_usage" # Service Analysis local service_status="All services up" if [ $services_down -gt 0 ]; then service_status="$services_down services down" ((critical++)) fi # Update Analysis local update_status="Up to date" if [ $updates_available -gt 0 ]; then update_status="$updates_available updates" ((warning++)) fi # Security Analysis local security_issues=() local security_status="Secure" if [ "$selinux_status" != "Enforcing" ]; then security_issues+=("SELinux: $selinux_status") fi if [ "$firewall_status" != "running" ]; then security_issues+=("Firewall: $firewall_status") fi if [ ${#security_issues[@]} -gt 0 ]; then security_status="$(IFS='
'; echo "${security_issues[*]}")" ((critical++)) fi # Update global counters TOTAL_CRITICAL=$((TOTAL_CRITICAL + critical)) TOTAL_WARNING=$((TOTAL_WARNING + warning)) # Determine overall status if [ $critical -gt 0 ]; then status="CRITICAL" elif [ $warning -gt 0 ]; then status="WARNING" else status="OK" fi add_html_row "$server" "$status" "$cpu_status" "$mem_status" "$disk_status" "$service_status" "$update_status" "$security_status" } send_email() { if [ $EMAIL_ENABLED -eq 0 ]; then return fi if ! command -v mailx &> /dev/null; then echo -e "${YELLOW}mailx not installed. Email report disabled.${NC}" return fi echo -e "\n${YELLOW}Sending email report to $EMAIL_RECIPIENT...${NC}" ( echo "From: $EMAIL_SENDER" echo "To: $EMAIL_RECIPIENT" echo "Subject: $EMAIL_SUBJECT" echo "MIME-Version: 1.0" echo "Content-Type: text/html; charset=UTF-8" echo cat "$HTML_REPORT" ) | mailx -t if [ $? -eq 0 ]; then echo -e "${GREEN}Email sent successfully!${NC}" else echo -e "${RED}Failed to send email!${NC}" fi } # Main execution validate_environment initialize_html while IFS= read -r server; do [[ -z "$server" || "$server" == \#* ]] && continue check_server_health "$server" done < "$INPUT_FILE" finalize_html send_email echo -e "\n${GREEN}Report generated: ${HTML_REPORT}${NC}"