Files
k3s-ansible/scripts/verify-compute-blade-agent.sh
Michael Skrynski eb800cd4e3 Fix K3s upgrade support and add monitoring dashboards
- Remove 'when: not k3s_binary.stat.exists' condition from k3s-server and
  k3s-agent installation tasks to allow in-place upgrades of K3s versions
- Update task names to reflect both install and upgrade functionality
- Add change detection using stdout inspection for better Ansible reporting

Add InfluxDB v2 native dashboard alongside Grafana dashboard:
- Create influxdb/rpi-cluster-dashboard-v2.json for InfluxDB 2.8 compatibility
- Update Grafana dashboard datasource UID from 'influx' to 'influxdb'
- Remove unused disk usage and network traffic panels per user request

Update worker node discovery in compute-blade-agent verification script:
- Fix pattern matching to work with cm4-* node naming convention
- Add support for pi-worker and cb-0* patterns as fallbacks
- Now correctly parses [worker] section from inventory

Update inventory version documentation:
- Add comment explaining how to use 'latest' for auto-updates
- Set version to v1.35.0+k3s1 (updated from v1.34.2+k3s1)
- Add guidance on version format for users

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-01-08 16:28:26 +01:00

112 lines
4.2 KiB
Bash
Executable File

#!/bin/bash
# Compute Blade Agent Verification Script
# This script checks the compute-blade-agent installation on worker nodes
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
INVENTORY="${SCRIPT_DIR}/inventory/hosts.ini"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ Compute Blade Agent Verification Script ║${NC}"
echo -e "${BLUE}╚════════════════════════════════════════════════════════════════╝${NC}\n"
# Parse worker nodes from inventory
echo -e "${YELLOW}Parsing worker nodes from inventory...${NC}"
WORKERS=$(grep -E "^\[worker\]" -A 100 "$INVENTORY" | grep -E "^cm4-|^pi-worker|^cb-0" | grep -v "^\[" | awk '{print $1}')
if [ -z "$WORKERS" ]; then
echo -e "${RED}No worker nodes found in inventory${NC}"
exit 1
fi
echo -e "${GREEN}Found worker nodes: ${WORKERS//$'\n'/, }${NC}\n"
# Check each worker
for worker in $WORKERS; do
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}Checking: ${worker}${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
# Get host IP from inventory
HOST_IP=$(grep -A 0 "^${worker}" "$INVENTORY" | grep ansible_host | awk '{print $NF}' | cut -d= -f2)
if [ -z "$HOST_IP" ]; then
echo -e "${RED}Could not find IP for ${worker}${NC}\n"
continue
fi
echo -e "Host: ${worker} (${HOST_IP})"
# Check connectivity
echo -n "Network: "
if ping -c 1 -W 2 "$HOST_IP" &> /dev/null; then
echo -e "${GREEN}✓ Reachable${NC}"
else
echo -e "${RED}✗ Unreachable${NC}\n"
continue
fi
# Check compute-blade-agent service
echo -n "Service Status: "
if ssh -o ConnectTimeout=5 -o BatchMode=yes "pi@${HOST_IP}" \
"systemctl is-active compute-blade-agent" &> /dev/null; then
echo -e "${GREEN}✓ Running${NC}"
else
echo -e "${RED}✗ Not running${NC}"
fi
# Check binary
echo -n "Binary: "
if ssh -o ConnectTimeout=5 -o BatchMode=yes "pi@${HOST_IP}" \
"[ -f /usr/local/bin/compute-blade-agent ]" &> /dev/null; then
echo -e "${GREEN}✓ Installed${NC}"
# Try to get version
VERSION=$(ssh -o ConnectTimeout=5 -o BatchMode=yes "pi@${HOST_IP}" \
"/usr/local/bin/compute-blade-agent --version 2>/dev/null" || echo "unknown")
echo " Version: $VERSION"
else
echo -e "${RED}✗ Not found${NC}"
fi
# Check config
echo -n "Config: "
if ssh -o ConnectTimeout=5 -o BatchMode=yes "pi@${HOST_IP}" \
"[ -f /etc/compute-blade-agent/config.yaml ]" &> /dev/null; then
echo -e "${GREEN}✓ Found${NC}"
else
echo -e "${YELLOW}⚠ Not found${NC}"
fi
# Check systemd service file
echo -n "Systemd Unit: "
if ssh -o ConnectTimeout=5 -o BatchMode=yes "pi@${HOST_IP}" \
"[ -f /etc/systemd/system/compute-blade-agent.service ]" &> /dev/null; then
echo -e "${GREEN}✓ Installed${NC}"
else
echo -e "${RED}✗ Not found${NC}"
fi
echo ""
done
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${GREEN}Verification complete!${NC}\n"
echo -e "${YELLOW}To view logs on a specific node, run:${NC}"
echo -e " ssh pi@<host-ip>"
echo -e " sudo journalctl -u compute-blade-agent -f\n"
echo -e "${YELLOW}To restart the service:${NC}"
echo -e " ssh pi@<host-ip>"
echo -e " sudo systemctl restart compute-blade-agent\n"