Files
compute-blade-agent/internal/agent/handler.go
Cedric Kienzler 781ded8e43 feat(bladectl)!: add more bladectl commands (#91)
This PR introduces a comprehensive set of new subcommands to bladectl, expanding its capabilities for querying and managing compute blade state. It also includes an internal refactor to simplify interface management across the gRPC API.

* `get`
	* `fan`: Returns current fan speed.
	* `identify`: Indicates whether the identify mode is active.
	* `stealth`: Shows if stealth mode is currently enabled.
	* `status`: Prints a full blade status report.
	* `temperature`: Retrieves current SoC temperature.
	* `critical`: Shows whether critical mode is active.
	* `power`: Reports the current power source (e.g., PoE+ or USB).
* `set`
	* `stealth`: Enables stealth mode.
* `remove`
	* `stealth`: Disables stealth mode.
* `describe`
	* `fan`: Outputs the current fan curve configuration.
* `monitor`: plot some charts about the state of the compute-blade-agent

* **gRPC API refactor**: The gRPC service definitions previously located in `internal/api` have been folded into `internal/agent`. This eliminates redundant interface declarations and ensures that all ComputeBladeAgent implementations are directly compatible with the gRPC API.
This reduces duplication and improves long-term maintainability and clarity of the interface contract.

```bash
bladectl set fan --percent 90 --blade 1 --blade 2
bladectl unset identify --blade 1 --blade 2 --blade 3 --blade 4
bladectl set stealth --blade 1 --blade 2 --blade 3 --blade 4
bladectl get status --blade 1 --blade 2 --blade 3 --blade 4
┌───────┬─────────────┬────────────────────┬───────────────┬──────────────┬──────────┬───────────────┬──────────────┐
│ BLADE │ TEMPERATURE │ FAN SPEED OVERRIDE │ FAN SPEED     │ STEALTH MODE │ IDENTIFY │ CRITICAL MODE │ POWER STATUS │
├───────┼─────────────┼────────────────────┼───────────────┼──────────────┼──────────┼───────────────┼──────────────┤
│ 1     │ 50°C        │ 90%                │ 5825 RPM(90%) │ Active       │ Off      │ Off           │ poe+         │
│ 2     │ 48°C        │ 90%                │ 5825 RPM(90%) │ Active       │ Off      │ Off           │ poe+         │
│ 3     │ 49°C        │ Not set            │ 4643 RPM(56%) │ Active       │ Off      │ Off           │ poe+         │
│ 4     │ 49°C        │ Not set            │ 4774 RPM(58%) │ Active       │ Off      │ Off           │ poe+         │
└───────┴─────────────┴────────────────────┴───────────────┴──────────────┴──────────┴───────────────┴──────────────┘
bladectl rm stealth --blade 1 --blade 2 --blade 3 --blade 4
bladectl rm fan --blade 1 --blade 2 --blade 3 --blade 4
bladectl get status --blade 1 --blade 2 --blade 3 --blade 4
┌───────┬─────────────┬────────────────────┬───────────────┬──────────────┬──────────┬───────────────┬──────────────┐
│ BLADE │ TEMPERATURE │ FAN SPEED OVERRIDE │ FAN SPEED     │ STEALTH MODE │ IDENTIFY │ CRITICAL MODE │ POWER STATUS │
├───────┼─────────────┼────────────────────┼───────────────┼──────────────┼──────────┼───────────────┼──────────────┤
│ 1     │ 51°C        │ Not set            │ 5177 RPM(66%) │ Off          │ Off      │ Off           │ poe+         │
│ 2     │ 49°C        │ Not set            │ 5177 RPM(58%) │ Off          │ Off      │ Off           │ poe+         │
│ 3     │ 50°C        │ Not set            │ 4659 RPM(60%) │ Off          │ Off      │ Off           │ poe+         │
│ 4     │ 48°C        │ Not set            │ 4659 RPM(54%) │ Off          │ Off      │ Off           │ poe+         │
└───────┴─────────────┴────────────────────┴───────────────┴──────────────┴──────────┴───────────────┴──────────────┘
```

when having multiple compute-blades in your bladeconfig:

```yaml
blades:
    - name: 1
      blade:
        server: blade-pi1:8081
        cert:
            certificate-authority-data: <redacted>
            client-certificate-data: <redacted>
            client-key-data: <redacted>
    - name: 2
      blade:
        server: blade-pi2:8081
        cert:
            certificate-authority-data: <redacted>
            client-certificate-data: <redacted>
            client-key-data: <redacted>
    - name: 3
      blade:
        server: blade-pi3:8081
        cert:
            certificate-authority-data: <redacted>
            client-certificate-data: <redacted>
            client-key-data: <redacted>
    - name: 4
      blade:
        server: blade-pi4:8081
        cert:
            certificate-authority-data: <redacted>
            client-certificate-data: <redacted>
            client-key-data: <redacted>
    - name: 4
      blade:
        server: blade-pi4:8081
        cert:
            certificate-authority-data: <redacted>
            client-certificate-data: <redacted>
            client-key-data: <redacted>
current-blade: 1
```

Fixes #4, #9 (partially), should help with #5

* test: improve unit-testing

* fix: pin github.com/warthog618/gpiod

---------

Co-authored-by: Cedric Kienzler <cedric@specht-labs.de>
2025-06-06 23:03:43 +02:00

105 lines
4.0 KiB
Go

package internal_agent
import (
"context"
"errors"
"github.com/compute-blade-community/compute-blade-agent/pkg/events"
"github.com/compute-blade-community/compute-blade-agent/pkg/fancontroller"
"github.com/compute-blade-community/compute-blade-agent/pkg/hal/led"
"github.com/compute-blade-community/compute-blade-agent/pkg/ledengine"
"github.com/compute-blade-community/compute-blade-agent/pkg/log"
"go.uber.org/zap"
)
// handleEvent processes an incoming event, updates state, and dispatches it to the appropriate handler based on the event type.
func (a *computeBladeAgent) handleEvent(ctx context.Context, event events.Event) error {
log.FromContext(ctx).Info("Handling event", zap.String("event", event.String()))
eventCounter.WithLabelValues(event.String()).Inc()
// register event in state
a.state.RegisterEvent(event)
// Dispatch incoming events to the right handler(s)
switch event {
case events.CriticalEvent:
// Handle critical event
return a.handleCriticalActive(ctx)
case events.CriticalResetEvent:
// Handle critical event
return a.handleCriticalReset(ctx)
case events.IdentifyEvent:
// Handle identify event
return a.handleIdentifyActive(ctx)
case events.IdentifyConfirmEvent:
// Handle identify event
return a.handleIdentifyConfirm(ctx)
case events.EdgeButtonEvent:
// Handle edge button press to toggle identify mode
event := events.Event(events.IdentifyEvent)
if a.state.IdentifyActive() {
event = events.Event(events.IdentifyConfirmEvent)
}
select {
case a.eventChan <- event:
default:
log.FromContext(ctx).Warn("Edge button press event dropped due to backlog")
droppedEventCounter.WithLabelValues(event.String()).Inc()
}
case events.NoopEvent:
}
return nil
}
// handleIdentifyActive is responsible for handling the identify event by setting a burst LED pattern based on the configuration.
func (a *computeBladeAgent) handleIdentifyActive(ctx context.Context) error {
log.FromContext(ctx).Info("Identify active")
return a.edgeLedEngine.SetPattern(ledengine.NewBurstPattern(led.Color{}, a.config.IdentifyLedColor))
}
// handleIdentifyConfirm handles the confirmation of an identify event by updating the LED engine with a static idle pattern.
func (a *computeBladeAgent) handleIdentifyConfirm(ctx context.Context) error {
log.FromContext(ctx).Info("Identify confirmed/cleared")
return a.edgeLedEngine.SetPattern(ledengine.NewStaticPattern(a.config.IdleLedColor))
}
// handleCriticalActive handles the system's response to a critical state by adjusting fan speed and LED indications.
// It sets the fan speed to 100%, disables stealth mode, and applies a critical LED pattern.
// Returns any errors encountered during the process as a combined error.
func (a *computeBladeAgent) handleCriticalActive(ctx context.Context) error {
log.FromContext(ctx).Warn("Blade in critical state, setting fan speed to 100% and turning on LEDs")
// Set fan speed to 100%
a.fanController.Override(&fancontroller.FanOverrideOpts{Percent: 100})
// Disable stealth mode (turn on LEDs)
setStealthModeError := a.blade.SetStealthMode(false)
// Set critical pattern for top LED
setPatternTopLedErr := a.topLedEngine.SetPattern(
ledengine.NewSlowBlinkPattern(led.Color{}, a.config.CriticalLedColor),
)
// Combine errors, but don't stop execution flow for now
return errors.Join(setStealthModeError, setPatternTopLedErr)
}
// handleCriticalReset handles the reset of a critical state by restoring default hardware settings for fans and LEDs.
func (a *computeBladeAgent) handleCriticalReset(ctx context.Context) error {
log.FromContext(ctx).Info("Critical state cleared, setting fan speed to default and restoring LEDs to default state")
// Reset fan controller overrides
a.fanController.Override(nil)
// Reset stealth mode
if err := a.blade.SetStealthMode(a.config.StealthModeEnabled); err != nil {
return err
}
// Set top LED off
if err := a.topLedEngine.SetPattern(ledengine.NewStaticPattern(led.Color{})); err != nil {
return err
}
return nil
}