- Remove 'when: not k3s_binary.stat.exists' condition from k3s-server and k3s-agent installation tasks to allow in-place upgrades of K3s versions - Update task names to reflect both install and upgrade functionality - Add change detection using stdout inspection for better Ansible reporting Add InfluxDB v2 native dashboard alongside Grafana dashboard: - Create influxdb/rpi-cluster-dashboard-v2.json for InfluxDB 2.8 compatibility - Update Grafana dashboard datasource UID from 'influx' to 'influxdb' - Remove unused disk usage and network traffic panels per user request Update worker node discovery in compute-blade-agent verification script: - Fix pattern matching to work with cm4-* node naming convention - Add support for pi-worker and cb-0* patterns as fallbacks - Now correctly parses [worker] section from inventory Update inventory version documentation: - Add comment explaining how to use 'latest' for auto-updates - Set version to v1.35.0+k3s1 (updated from v1.34.2+k3s1) - Add guidance on version format for users Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
239 lines
6.0 KiB
JSON
239 lines
6.0 KiB
JSON
{
|
|
"name": "Raspberry Pi K3s Cluster Metrics",
|
|
"description": "System monitoring dashboard for Raspberry Pi K3s cluster with Telegraf metrics",
|
|
"cells": [
|
|
{
|
|
"x": 0,
|
|
"y": 0,
|
|
"w": 6,
|
|
"h": 4,
|
|
"kind": "Gauge",
|
|
"name": "CPU Usage - Average",
|
|
"properties": {
|
|
"queries": [
|
|
{
|
|
"text": "from(bucket: \"rpi-cluster\")\n |> range(start: -15m)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_user\")\n |> mean()",
|
|
"editMode": "advanced"
|
|
}
|
|
],
|
|
"colors": [
|
|
{
|
|
"id": "0",
|
|
"type": "background",
|
|
"hex": "#00C9FF",
|
|
"value": 0
|
|
},
|
|
{
|
|
"id": "1",
|
|
"type": "background",
|
|
"hex": "#FFB94E",
|
|
"value": 50
|
|
},
|
|
{
|
|
"id": "2",
|
|
"type": "background",
|
|
"hex": "#FF3D3D",
|
|
"value": 80
|
|
}
|
|
],
|
|
"prefix": "",
|
|
"suffix": "%",
|
|
"decimalPlaces": 1,
|
|
"note": ""
|
|
}
|
|
},
|
|
{
|
|
"x": 6,
|
|
"y": 0,
|
|
"w": 6,
|
|
"h": 4,
|
|
"kind": "Gauge",
|
|
"name": "Memory Usage - Average",
|
|
"properties": {
|
|
"queries": [
|
|
{
|
|
"text": "from(bucket: \"rpi-cluster\")\n |> range(start: -15m)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> mean()",
|
|
"editMode": "advanced"
|
|
}
|
|
],
|
|
"colors": [
|
|
{
|
|
"id": "0",
|
|
"type": "background",
|
|
"hex": "#00C9FF",
|
|
"value": 0
|
|
},
|
|
{
|
|
"id": "1",
|
|
"type": "background",
|
|
"hex": "#FFB94E",
|
|
"value": 60
|
|
},
|
|
{
|
|
"id": "2",
|
|
"type": "background",
|
|
"hex": "#FF3D3D",
|
|
"value": 85
|
|
}
|
|
],
|
|
"prefix": "",
|
|
"suffix": "%",
|
|
"decimalPlaces": 1,
|
|
"note": ""
|
|
}
|
|
},
|
|
{
|
|
"x": 0,
|
|
"y": 4,
|
|
"w": 12,
|
|
"h": 4,
|
|
"kind": "TimeSeries",
|
|
"name": "CPU Usage - All Nodes",
|
|
"properties": {
|
|
"queries": [
|
|
{
|
|
"text": "from(bucket: \"rpi-cluster\")\n |> range(start: -6h)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu\")\n |> filter(fn: (r) => r[\"_field\"] == \"usage_user\")\n |> aggregateWindow(every: 1m, fn: mean)",
|
|
"editMode": "advanced"
|
|
}
|
|
],
|
|
"colors": [],
|
|
"axes": {
|
|
"x": {
|
|
"bounds": [],
|
|
"label": "",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
},
|
|
"y": {
|
|
"bounds": [],
|
|
"label": "CPU Usage (%)",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
}
|
|
},
|
|
"type": "xy",
|
|
"geom": "line",
|
|
"note": ""
|
|
}
|
|
},
|
|
{
|
|
"x": 0,
|
|
"y": 8,
|
|
"w": 12,
|
|
"h": 4,
|
|
"kind": "TimeSeries",
|
|
"name": "Memory Usage - All Nodes",
|
|
"properties": {
|
|
"queries": [
|
|
{
|
|
"text": "from(bucket: \"rpi-cluster\")\n |> range(start: -6h)\n |> filter(fn: (r) => r[\"_measurement\"] == \"mem\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> aggregateWindow(every: 1m, fn: mean)",
|
|
"editMode": "advanced"
|
|
}
|
|
],
|
|
"colors": [],
|
|
"axes": {
|
|
"x": {
|
|
"bounds": [],
|
|
"label": "",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
},
|
|
"y": {
|
|
"bounds": [],
|
|
"label": "Memory (%)",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
}
|
|
},
|
|
"type": "xy",
|
|
"geom": "line",
|
|
"note": ""
|
|
}
|
|
},
|
|
{
|
|
"x": 0,
|
|
"y": 12,
|
|
"w": 12,
|
|
"h": 4,
|
|
"kind": "TimeSeries",
|
|
"name": "CPU Temperature - All Nodes",
|
|
"properties": {
|
|
"queries": [
|
|
{
|
|
"text": "from(bucket: \"rpi-cluster\")\n |> range(start: -6h)\n |> filter(fn: (r) => r[\"_measurement\"] == \"cpu_temp_thermal\")\n |> filter(fn: (r) => r[\"_field\"] == \"value\")\n |> aggregateWindow(every: 1m, fn: mean)",
|
|
"editMode": "advanced"
|
|
}
|
|
],
|
|
"colors": [],
|
|
"axes": {
|
|
"x": {
|
|
"bounds": [],
|
|
"label": "",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
},
|
|
"y": {
|
|
"bounds": [],
|
|
"label": "Temperature (°C)",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
}
|
|
},
|
|
"type": "xy",
|
|
"geom": "line",
|
|
"note": ""
|
|
}
|
|
},
|
|
{
|
|
"x": 0,
|
|
"y": 16,
|
|
"w": 12,
|
|
"h": 4,
|
|
"kind": "TimeSeries",
|
|
"name": "System Load - All Nodes",
|
|
"properties": {
|
|
"queries": [
|
|
{
|
|
"text": "from(bucket: \"rpi-cluster\")\n |> range(start: -6h)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"load1\")\n |> aggregateWindow(every: 1m, fn: mean)",
|
|
"editMode": "advanced"
|
|
}
|
|
],
|
|
"colors": [],
|
|
"axes": {
|
|
"x": {
|
|
"bounds": [],
|
|
"label": "",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
},
|
|
"y": {
|
|
"bounds": [],
|
|
"label": "Load Average (1m)",
|
|
"prefix": "",
|
|
"suffix": "",
|
|
"base": "10",
|
|
"scale": "linear"
|
|
}
|
|
},
|
|
"type": "xy",
|
|
"geom": "line",
|
|
"note": ""
|
|
}
|
|
}
|
|
]
|
|
}
|