From fd7c9239b591f54f0f20478d40dcb5ae28ca3d5b Mon Sep 17 00:00:00 2001 From: Michael Skrynski Date: Mon, 12 Jan 2026 08:54:41 +0100 Subject: [PATCH] Update docs and roles for agent on all nodes - Switch compute-blade-agent deployment from workers to all nodes (control-plane and workers) - Use /usr/bin/compute-blade-agent instead of /usr/local/bin - Update verification scripts to reference /usr/bin/compute-blade-agent - Update docs to refer to all nodes across Deployment Guide, Checklist, and Getting Started - Change site.yml to install on all hosts instead of just workers - Align example commands to the all-nodes workflow --- COMPUTE_BLADE_AGENT.md | 22 ++++++------- DEPLOYMENT_CHECKLIST.md | 25 +++++++-------- GETTING_STARTED.md | 40 +++++++++++++----------- README.md | 21 +++++++------ roles/compute-blade-agent/tasks/main.yml | 22 ++++++------- scripts/verify-compute-blade-agent.sh | 4 +-- site.yml | 2 +- 7 files changed, 71 insertions(+), 65 deletions(-) diff --git a/COMPUTE_BLADE_AGENT.md b/COMPUTE_BLADE_AGENT.md index 7ed6887..83097b1 100644 --- a/COMPUTE_BLADE_AGENT.md +++ b/COMPUTE_BLADE_AGENT.md @@ -1,6 +1,6 @@ # Compute Blade Agent Deployment Guide -Quick reference for deploying and managing the Compute Blade Agent in your k3s-ansible cluster. +Quick reference for deploying and managing the Compute Blade Agent on all nodes in your k3s-ansible cluster (control-plane and worker nodes). ## Quick Start @@ -49,28 +49,28 @@ cm4-04 ansible_host=192.168.30.104 ansible_user=pi ### Check Service Status ```bash -ssh pi@ +ssh pi@ sudo systemctl status compute-blade-agent ``` ### View Logs ```bash -ssh pi@ +ssh pi@ sudo journalctl -u compute-blade-agent -f ``` ### Check Installation ```bash -ssh pi@ -/usr/local/bin/compute-blade-agent --version +ssh pi@ +/usr/bin/compute-blade-agent --version ls -la /etc/compute-blade-agent/ ``` ## File Locations -- **Binary**: `/usr/local/bin/compute-blade-agent` +- **Binary**: `/usr/bin/compute-blade-agent` - **Config**: `/etc/compute-blade-agent/config.yaml` - **Systemd Service**: `/etc/systemd/system/compute-blade-agent.service` - **Logs**: `journalctl -u compute-blade-agent` @@ -81,7 +81,7 @@ Configure via `BLADE_` prefixed environment variables: ```bash export BLADE_CONFIG_PATH=/etc/compute-blade-agent/config.yaml -/usr/local/bin/compute-blade-agent +/usr/bin/compute-blade-agent ``` ## Monitoring @@ -146,7 +146,7 @@ sudo systemctl restart compute-blade-agent ```bash # SSH to node -ssh pi@ +ssh pi@ # Check if uninstall script exists ls -la /usr/local/bin/*compute-blade* @@ -163,14 +163,14 @@ ansible-playbook site.yml --tags compute-blade-agent ### From Single Node ```bash -ssh pi@ +ssh pi@ sudo bash /usr/local/bin/k3s-uninstall-compute-blade-agent.sh ``` -### From All Worker Nodes +### From All Nodes ```bash -ansible worker -m shell -a "bash /usr/local/bin/k3s-uninstall-compute-blade-agent.sh" --become +ansible k3s_cluster -m shell -a "bash /usr/local/bin/k3s-uninstall-compute-blade-agent.sh" --become ``` ## Features diff --git a/DEPLOYMENT_CHECKLIST.md b/DEPLOYMENT_CHECKLIST.md index cef2841..ff2f4c3 100644 --- a/DEPLOYMENT_CHECKLIST.md +++ b/DEPLOYMENT_CHECKLIST.md @@ -1,4 +1,4 @@ -# Compute Blade Agent Deployment Checklist +# K3s Cluster Deployment Checklist (with Compute Blade Agent on All Nodes) ## Pre-Deployment @@ -20,7 +20,7 @@ This will: 1. Prepare all nodes (prerequisites) 2. Install K3s server on master 3. Install K3s agents on workers -4. Install compute-blade-agent on workers +4. Install compute-blade-agent on all nodes (control-plane and workers) 5. Deploy test nginx application - [ ] Start full deployment @@ -43,7 +43,7 @@ ansible-playbook site.yml --tags compute-blade-agent ``` - [ ] Use on existing K3s cluster -- [ ] Deploy agent to all configured workers +- [ ] Deploy agent to all configured nodes (masters and workers) - [ ] Verify with verification script ## Post-Deployment Verification @@ -64,7 +64,7 @@ bash scripts/verify-compute-blade-agent.sh ``` - [ ] All worker nodes pass connectivity check -- [ ] Binary is installed at `/usr/local/bin/compute-blade-agent` +- [ ] Binary is installed at `/usr/bin/compute-blade-agent` - [ ] Service status shows "Running" - [ ] Config file exists at `/etc/compute-blade-agent/config.yaml` @@ -126,7 +126,7 @@ kubectl apply -f manifests/compute-blade-agent-daemonset.yaml - [ ] Check status: `sudo systemctl status compute-blade-agent` - [ ] Check logs: `sudo journalctl -u compute-blade-agent -f` -- [ ] Check if binary exists: `ls -la /usr/local/bin/compute-blade-agent` +- [ ] Check if binary exists: `ls -la /usr/bin/compute-blade-agent` - [ ] Check systemd unit: `cat /etc/systemd/system/compute-blade-agent.service` ### Installation Failed @@ -159,7 +159,7 @@ enable_compute_blade_agent=true # or false ### Per-Node Configuration -Note: cm4-02 and cm4-03 are now **master nodes**, not workers. To enable/disable compute-blade-agent on specific nodes: +Compute-blade-agent is now installed on all nodes (control-plane and workers). To enable/disable on specific nodes: ```ini [master] @@ -172,9 +172,8 @@ cm4-04 ansible_host=192.168.30.104 ansible_user=pi enable_compute_blade_agent=tr ``` - [ ] Per-node settings configured as needed -- [ ] Master nodes typically don't need compute-blade-agent - [ ] Saved inventory file -- [ ] Re-run playbook if changes made +- [ ] Re-run playbook if changes made: `ansible-playbook site.yml --tags compute-blade-agent` ### Agent Configuration @@ -211,13 +210,13 @@ sudo journalctl -u compute-blade-agent -f - [ ] Monitor for any issues - [ ] Press Ctrl+C to exit -### Check Service on All Workers +### Check Service on All Nodes ```bash -ansible worker -m shell -a "systemctl status compute-blade-agent" --become +ansible k3s_cluster -m shell -a "systemctl status compute-blade-agent" --become ``` -- [ ] All workers show active status +- [ ] All nodes show active status ## HA Cluster Maintenance @@ -243,8 +242,8 @@ watch kubectl get nodes ### Uninstall K3s from All Nodes ```bash -ansible all -m shell -a "bash /usr/local/bin/k3s-uninstall.sh" --become -ansible worker -m shell -a "bash /usr/local/bin/k3s-agent-uninstall.sh" --become +ansible k3s_cluster -m shell -a "bash /usr/local/bin/k3s-uninstall.sh" --become +ansible k3s_cluster -m shell -a "bash /usr/local/bin/k3s-agent-uninstall.sh 2>/dev/null || true" --become ``` - [ ] All K3s services stopped diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md index 844241b..69ef4b0 100644 --- a/GETTING_STARTED.md +++ b/GETTING_STARTED.md @@ -41,7 +41,7 @@ This will: 1. Prepare all nodes (10-15 minutes) 2. Install K3s server on master (5 minutes) 3. Install K3s agents on workers (5 minutes) -4. Install compute-blade-agent on workers (2-3 minutes per node) +4. Install compute-blade-agent on all nodes (2-3 minutes per node) 5. Deploy test application (1 minute) **Total time**: ~30-45 minutes @@ -67,7 +67,7 @@ cm4-04 Ready 3m v1.35.0+k3s1 ### Enable/Disable Agent -To enable agent on all workers (default): +To enable agent on all nodes (default): ```ini [k3s_cluster:vars] @@ -83,10 +83,13 @@ enable_compute_blade_agent=false To enable/disable on specific nodes: ```ini +[master] +cm4-01 ansible_host=192.168.30.101 ansible_user=pi k3s_server_init=true enable_compute_blade_agent=true +cm4-02 ansible_host=192.168.30.102 ansible_user=pi k3s_server_init=false enable_compute_blade_agent=false +cm4-03 ansible_host=192.168.30.103 ansible_user=pi k3s_server_init=false enable_compute_blade_agent=false + [worker] -cm4-02 ansible_host=192.168.30.102 ansible_user=pi enable_compute_blade_agent=true -cm4-03 ansible_host=192.168.30.103 ansible_user=pi enable_compute_blade_agent=false -cm4-04 ansible_host=192.168.30.104 ansible_user=pi +cm4-04 ansible_host=192.168.30.104 ansible_user=pi enable_compute_blade_agent=true ``` ## Deployment Options @@ -97,7 +100,7 @@ cm4-04 ansible_host=192.168.30.104 ansible_user=pi ansible-playbook site.yml ``` -Deploys K3s + compute-blade-agent + test application +Deploys K3s + compute-blade-agent on all nodes + test application ### Option 2: Skip Test Application (Faster) @@ -113,7 +116,7 @@ Useful if cluster already has applications ansible-playbook site.yml --tags compute-blade-agent ``` -Deploy agent to existing K3s cluster +Deploy agent to existing K3s cluster (all nodes) ### Option 4: Skip Agent @@ -131,15 +134,15 @@ Deploy K3s without agent # From control machine bash scripts/verify-compute-blade-agent.sh -# On a worker node -ssh pi@192.168.30.102 +# On any node +ssh pi@192.168.30.101 sudo systemctl status compute-blade-agent ``` ### View Logs ```bash -ssh pi@192.168.30.102 +ssh pi@192.168.30.101 sudo journalctl -u compute-blade-agent -f ``` @@ -148,15 +151,15 @@ Press `Ctrl+C` to exit logs. ### Check Binary ```bash -ssh pi@192.168.30.102 -/usr/local/bin/compute-blade-agent --version +ssh pi@192.168.30.101 +/usr/bin/compute-blade-agent --version ``` ## What Was Installed -### On Each Worker Node +### On Each Node (Control-plane and Workers) -- **Binary**: `/usr/local/bin/compute-blade-agent` +- **Binary**: `/usr/bin/compute-blade-agent` - **CLI Tool**: `/usr/local/bin/bladectl` - **Config**: `/etc/compute-blade-agent/config.yaml` - **Service**: `compute-blade-agent.service` (auto-start) @@ -172,9 +175,10 @@ ssh pi@192.168.30.102 ## Troubleshooting ### Service Not Running +### Check Service ```bash -ssh pi@192.168.30.102 +ssh pi@192.168.30.101 sudo systemctl status compute-blade-agent sudo journalctl -u compute-blade-agent -n 50 ``` @@ -189,7 +193,7 @@ ansible-playbook site.yml --tags compute-blade-agent ```bash ssh pi@192.168.30.102 -ls -la /usr/local/bin/compute-blade-agent +ls -la /usr/bin/compute-blade-agent ls -la /etc/compute-blade-agent/ sudo systemctl status compute-blade-agent ``` @@ -244,7 +248,7 @@ ssh pi@192.168.30.103 ### Deploy Only to Specific Nodes ```bash -ansible-playbook site.yml --tags compute-blade-agent --limit cm4-04 +ansible-playbook site.yml --tags compute-blade-agent --limit cm4-01 ``` ### Disable Agent for Next Deployment @@ -262,7 +266,7 @@ ansible-playbook site.yml --tags compute-blade-agent ### Uninstall Agent (All Workers) ```bash -ansible worker -m shell -a "bash /usr/local/bin/k3s-uninstall-compute-blade-agent.sh" --become +ansible k3s_cluster -m shell -a "bash /usr/local/bin/k3s-uninstall-compute-blade-agent.sh" --become ``` ### Uninstall K3s (All Nodes) diff --git a/README.md b/README.md index c4a0176..bd07732 100644 --- a/README.md +++ b/README.md @@ -898,17 +898,20 @@ The playbook includes automatic deployment of the Compute Blade Agent, a system The compute-blade-agent deployment is controlled by the `enable_compute_blade_agent` variable in `inventory/hosts.ini`: ```ini -# Enable/disable compute-blade-agent on all worker nodes +# Enable/disable compute-blade-agent on all nodes (control-plane and workers) enable_compute_blade_agent=true ``` To disable on specific nodes, add an override: ```ini +[master] +cm4-01 ansible_host=192.168.30.101 ansible_user=pi k3s_server_init=true enable_compute_blade_agent=true +cm4-02 ansible_host=192.168.30.102 ansible_user=pi k3s_server_init=false enable_compute_blade_agent=false +cm4-03 ansible_host=192.168.30.103 ansible_user=pi k3s_server_init=false enable_compute_blade_agent=false + [worker] -cm4-02 ansible_host=192.168.30.102 ansible_user=pi enable_compute_blade_agent=false -cm4-03 ansible_host=192.168.30.103 ansible_user=pi -cm4-04 ansible_host=192.168.30.104 ansible_user=pi +cm4-04 ansible_host=192.168.30.104 ansible_user=pi enable_compute_blade_agent=true ``` ### Deployment @@ -919,7 +922,7 @@ The compute-blade-agent is automatically deployed as part of the main playbook: ansible-playbook site.yml ``` -Or deploy only the compute-blade-agent on worker nodes: +Or deploy only the compute-blade-agent on all nodes: ```bash ansible-playbook site.yml --tags compute-blade-agent @@ -927,11 +930,11 @@ ansible-playbook site.yml --tags compute-blade-agent ### Verification -Check the agent status on a worker node: +Check the agent status on any node: ```bash -# SSH into a worker node -ssh pi@192.168.30.102 +# SSH into any node +ssh pi@192.168.30.101 # Check service status sudo systemctl status compute-blade-agent @@ -940,7 +943,7 @@ sudo systemctl status compute-blade-agent sudo journalctl -u compute-blade-agent -f # Check binary installation -/usr/local/bin/compute-blade-agent --version +/usr/bin/compute-blade-agent --version ``` ### Configuration Files diff --git a/roles/compute-blade-agent/tasks/main.yml b/roles/compute-blade-agent/tasks/main.yml index cdbdb6a..7c5a2f0 100644 --- a/roles/compute-blade-agent/tasks/main.yml +++ b/roles/compute-blade-agent/tasks/main.yml @@ -1,21 +1,21 @@ --- - name: Skip compute-blade-agent installation if disabled debug: - msg: "compute-blade-agent installation is disabled for this node" + msg: 'compute-blade-agent installation is disabled for this node' when: not enable_compute_blade_agent | bool - name: Block for compute-blade-agent installation block: - name: Check if compute-blade-agent is already installed stat: - path: /usr/local/bin/compute-blade-agent + path: /usr/bin/compute-blade-agent register: agent_binary - name: Download compute-blade-agent installer shell: curl -L -o /tmp/compute-blade-agent-installer.sh https://raw.githubusercontent.com/compute-blade-community/compute-blade-agent/main/hack/autoinstall.sh when: not agent_binary.stat.exists environment: - PATH: "{{ ansible_env.PATH }}" + PATH: '{{ ansible_env.PATH }}' - name: Make installer executable file: @@ -27,7 +27,7 @@ shell: /tmp/compute-blade-agent-installer.sh when: not agent_binary.stat.exists environment: - PATH: "{{ ansible_env.PATH }}" + PATH: '{{ ansible_env.PATH }}' - name: Wait for compute-blade-agent service to be available systemd: @@ -48,7 +48,7 @@ - name: Display compute-blade-agent service status debug: - msg: "compute-blade-agent service is {{ agent_service.status.ActiveState }}" + msg: 'compute-blade-agent service is {{ agent_service.status.ActiveState }}' - name: Check if compute-blade-agent config exists stat: @@ -57,20 +57,20 @@ - name: Display config location if it exists debug: - msg: "compute-blade-agent config found at /etc/compute-blade-agent/config.yaml" + msg: 'compute-blade-agent config found at /etc/compute-blade-agent/config.yaml' when: agent_config.stat.exists - name: Check compute-blade-agent binary version - shell: /usr/local/bin/compute-blade-agent --version 2>/dev/null || echo "Version info not available" + shell: /usr/bin/compute-blade-agent --version 2>/dev/null || echo "Version info not available" register: agent_version changed_when: false - name: Display compute-blade-agent installation result debug: msg: - - "compute-blade-agent has been successfully installed and started" - - "Binary location: /usr/local/bin/compute-blade-agent" - - "Config location: /etc/compute-blade-agent/config.yaml" - - "Service status: {{ agent_service.status.ActiveState }}" + - 'compute-blade-agent has been successfully installed and started' + - 'Binary location: /usr/bin/compute-blade-agent' + - 'Config location: /etc/compute-blade-agent/config.yaml' + - 'Service status: {{ agent_service.status.ActiveState }}' when: enable_compute_blade_agent | bool diff --git a/scripts/verify-compute-blade-agent.sh b/scripts/verify-compute-blade-agent.sh index 1627954..a951fe8 100755 --- a/scripts/verify-compute-blade-agent.sh +++ b/scripts/verify-compute-blade-agent.sh @@ -67,12 +67,12 @@ for worker in $WORKERS; do # Check binary echo -n "Binary: " if ssh -o ConnectTimeout=5 -o BatchMode=yes "pi@${HOST_IP}" \ - "[ -f /usr/local/bin/compute-blade-agent ]" &> /dev/null; then + "[ -f /usr/bin/compute-blade-agent ]" &> /dev/null; then echo -e "${GREEN}✓ Installed${NC}" # Try to get version VERSION=$(ssh -o ConnectTimeout=5 -o BatchMode=yes "pi@${HOST_IP}" \ - "/usr/local/bin/compute-blade-agent --version 2>/dev/null" || echo "unknown") + "/usr/bin/compute-blade-agent --version 2>/dev/null" || echo "unknown") echo " Version: $VERSION" else echo -e "${RED}✗ Not found${NC}" diff --git a/site.yml b/site.yml index 1d25373..d61ed3f 100644 --- a/site.yml +++ b/site.yml @@ -41,7 +41,7 @@ - worker - name: Install compute-blade-agent on workers - hosts: worker + hosts: all become: true roles: - role: compute-blade-agent