Merge pull request #14 from Uptime-Lab/feat/add-fancontroller

feat/fix: add linear fan speed control based on temperature
This commit is contained in:
Matthias Riegler
2023-09-04 20:00:11 +02:00
committed by GitHub
9 changed files with 388 additions and 38 deletions

View File

@@ -9,6 +9,10 @@ run:
lint:
golangci-lint run
.PHONY: test
test:
go test ./... -v
.PHONY: generate
generate: buf
$(BUF) generate
@@ -16,6 +20,9 @@ generate: buf
release:
goreleaser release --clean
snapshot:
goreleaser release --snapshot --skip-publish --clean
# Dependencies
LOCALBIN ?= $(shell pwd)/bin
$(LOCALBIN):

View File

@@ -5,6 +5,8 @@ import (
"fmt"
"net"
"net/http"
"net/http/pprof"
_ "net/http/pprof"
"os"
"os/signal"
"sync"
@@ -14,6 +16,7 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
bladeapiv1alpha1 "github.com/xvzf/computeblade-agent/api/bladeapi/v1alpha1"
"github.com/xvzf/computeblade-agent/internal/agent"
"github.com/xvzf/computeblade-agent/pkg/fancontroller"
"github.com/xvzf/computeblade-agent/pkg/ledengine"
"github.com/xvzf/computeblade-agent/pkg/log"
"go.uber.org/zap"
@@ -24,7 +27,6 @@ func main() {
var wg sync.WaitGroup
// setup logger
zapLogger := zap.Must(zap.NewDevelopment()).With(zap.String("app", "computeblade-agent"))
_ = zap.ReplaceGlobals(zapLogger.With(zap.String("scope", "global")))
baseCtx := log.IntoContext(context.Background(), zapLogger)
@@ -33,11 +35,17 @@ func main() {
defer cancelCtx(context.Canceled)
computebladeAgent, err := agent.NewComputeBladeAgent(agent.ComputeBladeAgentConfig{
IdleLedColor: ledengine.LedColorGreen(0.05),
IdentifyLedColor: ledengine.LedColorPurple(0.05),
CriticalLedColor: ledengine.LedColorRed(0.3),
StealthModeEnabled: false,
DefaultFanSpeed: 40,
IdleLedColor: ledengine.LedColorGreen(0.05),
IdentifyLedColor: ledengine.LedColorPurple(0.05),
CriticalLedColor: ledengine.LedColorRed(0.3),
StealthModeEnabled: false,
FanControllerConfig: fancontroller.FanControllerConfig{
Steps: []fancontroller.FanControllerStep{
{Temperature: 40, Speed: 40},
{Temperature: 55, Speed: 80},
},
},
FanUpdateInterval: 5 * time.Second,
CriticalTemperature: 60,
})
if err != nil {
@@ -100,15 +108,20 @@ func main() {
}()
// setup prometheus endpoint
promHandler := http.NewServeMux()
promHandler.Handle("/metrics", promhttp.Handler())
server := &http.Server{Addr: ":9666", Handler: promHandler}
instrumentationHandler := http.NewServeMux()
instrumentationHandler.Handle("/metrics", promhttp.Handler())
instrumentationHandler.HandleFunc("/debug/pprof/", pprof.Index)
instrumentationHandler.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
instrumentationHandler.HandleFunc("/debug/pprof/profile", pprof.Profile)
instrumentationHandler.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
instrumentationHandler.HandleFunc("/debug/pprof/trace", pprof.Trace)
server := &http.Server{Addr: ":9666", Handler: instrumentationHandler}
wg.Add(1)
go func() {
defer wg.Done()
err := server.ListenAndServe()
if err != nil && err != http.ErrServerClosed {
log.FromContext(ctx).Error("Failed to start prometheus server", zap.Error(err))
log.FromContext(ctx).Error("Failed to start prometheus/pprof server", zap.Error(err))
cancelCtx(err)
}
}()
@@ -120,7 +133,7 @@ func main() {
defer cancel()
err := server.Shutdown(shutdownCtx)
if err != nil {
log.FromContext(ctx).Error("Failed to shutdown prometheus server", zap.Error(err))
log.FromContext(ctx).Error("Failed to shutdown prometheus/pprof server", zap.Error(err))
}
}()

View File

@@ -4,9 +4,11 @@ import (
"context"
"errors"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/xvzf/computeblade-agent/pkg/fancontroller"
"github.com/xvzf/computeblade-agent/pkg/hal"
"github.com/xvzf/computeblade-agent/pkg/ledengine"
"github.com/xvzf/computeblade-agent/pkg/log"
@@ -60,7 +62,6 @@ func (e Event) String() string {
}
type ComputeBladeAgentConfig struct {
// IdleLedColor is the color of the edge LED when the blade is idle mode
IdleLedColor hal.LedColor
// IdentifyLedColor is the color of the edge LED when the blade is in identify mode
@@ -72,11 +73,11 @@ type ComputeBladeAgentConfig struct {
// StealthModeEnabled indicates whether stealth mode is enabled
StealthModeEnabled bool
// DefaultFanSpeed is the default fan speed in percent. Usually 40% is sufficient
DefaultFanSpeed uint
// Critical temperature of the compute blade (used to trigger critical mode)
CriticalTemperature uint
FanControllerConfig fancontroller.FanControllerConfig
FanUpdateInterval time.Duration
}
// ComputeBladeAgent implements the core-logic of the agent. It is responsible for handling events and interfacing with the hardware.
@@ -102,6 +103,8 @@ type computeBladeAgentImpl struct {
edgeLedEngine ledengine.LedEngine
topLedEngine ledengine.LedEngine
fanController fancontroller.FanController
eventChan chan Event
}
@@ -132,13 +135,22 @@ func NewComputeBladeAgent(opts ComputeBladeAgentConfig) (ComputeBladeAgent, erro
return nil, err
}
fanController, err := fancontroller.NewLinearFanController(opts.FanControllerConfig)
if err != nil {
return nil, err
}
return &computeBladeAgentImpl{
opts: opts,
blade: blade,
edgeLedEngine: edgeLedEngine,
topLedEngine: topLedEngine,
fanController: fanController,
state: NewComputeBladeState(),
eventChan: make(chan Event, 10), // backlog of 10 events. They should process fast but we e.g. don't want to miss button presses
eventChan: make(
chan Event,
10,
), // backlog of 10 events. They should process fast but we e.g. don't want to miss button presses
}, nil
}
@@ -153,9 +165,6 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error {
a.state.RegisterEvent(NoopEvent)
// Set defaults
if err := a.blade.SetFanSpeed(uint8(a.opts.DefaultFanSpeed)); err != nil {
return err
}
if err := a.blade.SetStealthMode(a.opts.StealthModeEnabled); err != nil {
return err
}
@@ -206,6 +215,18 @@ func (a *computeBladeAgentImpl) Run(origCtx context.Context) error {
}
}()
// Start fan controller
wg.Add(1)
go func() {
defer wg.Done()
log.FromContext(ctx).Info("Starting fan controller")
err := a.runFanController(ctx)
if err != nil && err != context.Canceled {
log.FromContext(ctx).Error("Fan Controller Failed", zap.Error(err))
cancelCtx(err)
}
}()
// Start event handler
wg.Add(1)
go func() {
@@ -295,7 +316,7 @@ func (a *computeBladeAgentImpl) handleCriticalActive(ctx context.Context) error
log.FromContext(ctx).Warn("Blade in critical state, setting fan speed to 100% and turning on LEDs")
// Set fan speed to 100%
setFanspeedError := a.blade.SetFanSpeed(100)
a.fanController.Override(&fancontroller.FanOverrideOpts{Speed: 100})
// Disable stealth mode (turn on LEDs)
setStealthModeError := a.blade.SetStealthMode(false)
@@ -305,15 +326,13 @@ func (a *computeBladeAgentImpl) handleCriticalActive(ctx context.Context) error
ledengine.NewSlowBlinkPattern(hal.LedColor{}, a.opts.CriticalLedColor),
)
// Combine errors, but don't stop execution flow for now
return errors.Join(setFanspeedError, setStealthModeError, setPatternTopLedErr)
return errors.Join(setStealthModeError, setPatternTopLedErr)
}
func (a *computeBladeAgentImpl) handleCriticalReset(ctx context.Context) error {
log.FromContext(ctx).Info("Critical state cleared, setting fan speed to default and restoring LEDs to default state")
// Set fan speed to 100%
if err := a.blade.SetFanSpeed(uint8(a.opts.DefaultFanSpeed)); err != nil {
return err
}
// Reset fan controller overrides
a.fanController.Override(nil)
// Reset stealth mode
if err := a.blade.SetStealthMode(a.opts.StealthModeEnabled); err != nil {
@@ -351,12 +370,41 @@ func (a *computeBladeAgentImpl) runEdgeLedEngine(ctx context.Context) error {
return a.edgeLedEngine.Run(ctx)
}
func (a *computeBladeAgentImpl) runFanController(ctx context.Context) error {
// Update fan speed periodically
ticker := time.NewTicker(a.opts.FanUpdateInterval)
for {
// Wait for the next tick
select {
case <-ctx.Done():
ticker.Stop()
return ctx.Err()
case <-ticker.C:
}
// Get temperature
temp, err := a.blade.GetTemperature()
if err != nil {
log.FromContext(ctx).Error("Failed to get temperature", zap.Error(err))
temp = 100 // set to a high value to trigger the maximum speed defined by the fan curve
}
// Derive fan speed from temperature
speed := a.fanController.GetFanSpeed(temp)
// Set fan speed
if err := a.blade.SetFanSpeed(speed); err != nil {
log.FromContext(ctx).Error("Failed to set fan speed", zap.Error(err))
}
}
}
// EmitEvent dispatches an event to the event handler
func (a *computeBladeAgentImpl) EmitEvent(ctx context.Context, event Event) error {
select {
case a.eventChan <- event:
return nil
case <- ctx.Done():
case <-ctx.Done():
return ctx.Err()
}
}
@@ -366,7 +414,8 @@ func (a *computeBladeAgentImpl) SetFanSpeed(_ context.Context, speed uint8) erro
if a.state.CriticalActive() {
return errors.New("cannot set fan speed while the blade is in a critical state")
}
return a.blade.SetFanSpeed(speed)
a.fanController.Override(&fancontroller.FanOverrideOpts{Speed: speed})
return nil
}
// SetStealthMode enables/disables the stealth mode

View File

@@ -0,0 +1,88 @@
package fancontroller
import (
"fmt"
"sync"
)
type FanController interface {
Override(opts *FanOverrideOpts)
GetFanSpeed(temperature float64) uint8
}
type FanOverrideOpts struct {
Speed uint8
}
type FanControllerStep struct {
// Temperature is the temperature to react to
Temperature float64
// Speed is the fan speed in percent
Speed uint8
}
// FanController configures a fan controller for the computeblade
type FanControllerConfig struct {
// Steps defines the temperature/speed steps for the fan controller
Steps []FanControllerStep
}
// FanController is a simple fan controller that reacts to temperature changes with a linear function
type fanControllerLinear struct {
mu sync.Mutex
overrideOpts *FanOverrideOpts
config FanControllerConfig
}
// NewFanControllerLinear creates a new FanControllerLinear
func NewLinearFanController(config FanControllerConfig) (FanController, error) {
// Validate config for a very simple linear fan controller
if len(config.Steps) != 2 {
return nil, fmt.Errorf("exactly two steps must be defined")
}
if config.Steps[0].Temperature > config.Steps[1].Temperature {
return nil, fmt.Errorf("step 1 temperature must be lower than step 2 temperature")
}
if config.Steps[0].Speed > config.Steps[1].Speed {
return nil, fmt.Errorf("step 1 speed must be lower than step 2 speed")
}
if config.Steps[0].Speed > 100 || config.Steps[1].Speed > 100 {
return nil, fmt.Errorf("speed must be between 0 and 100")
}
return &fanControllerLinear{
config: config,
}, nil
}
func (f *fanControllerLinear) Override(opts *FanOverrideOpts) {
f.mu.Lock()
defer f.mu.Unlock()
f.overrideOpts = opts
}
// GetFanSpeed returns the fan speed in percent based on the current temperature
func (f *fanControllerLinear) GetFanSpeed(temperature float64) uint8 {
f.mu.Lock()
defer f.mu.Unlock()
if f.overrideOpts != nil {
return f.overrideOpts.Speed
}
if temperature <= f.config.Steps[0].Temperature {
return f.config.Steps[0].Speed
}
if temperature >= f.config.Steps[1].Temperature {
return f.config.Steps[1].Speed
}
// Calculate slope
slope := float64(f.config.Steps[1].Speed-f.config.Steps[0].Speed) / (f.config.Steps[1].Temperature - f.config.Steps[0].Temperature)
// Calculate speed
speed := float64(f.config.Steps[0].Speed) + slope*(temperature-f.config.Steps[0].Temperature)
return uint8(speed)
}

View File

@@ -0,0 +1,147 @@
// fancontroller_test.go
package fancontroller_test
import (
"testing"
"github.com/xvzf/computeblade-agent/pkg/fancontroller"
)
func TestFanControllerLinear_GetFanSpeed(t *testing.T) {
t.Parallel()
config := fancontroller.FanControllerConfig{
Steps: []fancontroller.FanControllerStep{
{Temperature: 20, Speed: 30},
{Temperature: 30, Speed: 60},
},
}
controller, err := fancontroller.NewLinearFanController(config)
if err != nil {
t.Fatalf("Failed to create fan controller: %v", err)
}
testCases := []struct {
temperature float64
expected uint8
}{
{15, 30}, // Should use the minimum speed
{25, 45}, // Should calculate speed based on linear function
{35, 60}, // Should use the maximum speed
}
for _, tc := range testCases {
expected := tc.expected
temperature := tc.temperature
t.Run("", func(t *testing.T) {
t.Parallel()
speed := controller.GetFanSpeed(temperature)
if speed != expected {
t.Errorf("For temperature %.2f, expected speed %d but got %d", temperature, expected, speed)
}
})
}
}
func TestFanControllerLinear_GetFanSpeedWithOverride(t *testing.T) {
t.Parallel()
config := fancontroller.FanControllerConfig{
Steps: []fancontroller.FanControllerStep{
{Temperature: 20, Speed: 30},
{Temperature: 30, Speed: 60},
},
}
controller, err := fancontroller.NewLinearFanController(config)
if err != nil {
t.Fatalf("Failed to create fan controller: %v", err)
}
controller.Override(&fancontroller.FanOverrideOpts{
Speed: 99,
})
testCases := []struct {
temperature float64
expected uint8
}{
{15, 99},
{25, 99},
{35, 99},
}
for _, tc := range testCases {
expected := tc.expected
temperature := tc.temperature
t.Run("", func(t *testing.T) {
t.Parallel()
speed := controller.GetFanSpeed(temperature)
if speed != expected {
t.Errorf("For temperature %.2f, expected speed %d but got %d", temperature, expected, speed)
}
})
}
}
func TestFanControllerLinear_ConstructionErrors(t *testing.T) {
testCases := []struct {
name string
config fancontroller.FanControllerConfig
errMsg string
}{
{
name: "InvalidStepCount",
config: fancontroller.FanControllerConfig{
Steps: []fancontroller.FanControllerStep{
{Temperature: 20, Speed: 30},
},
},
errMsg: "exactly two steps must be defined",
},
{
name: "InvalidStepTemperatures",
config: fancontroller.FanControllerConfig{
Steps: []fancontroller.FanControllerStep{
{Temperature: 30, Speed: 60},
{Temperature: 20, Speed: 30},
},
},
errMsg: "step 1 temperature must be lower than step 2 temperature",
},
{
name: "InvalidStepSpeeds",
config: fancontroller.FanControllerConfig{
Steps: []fancontroller.FanControllerStep{
{Temperature: 20, Speed: 60},
{Temperature: 30, Speed: 30},
},
},
errMsg: "step 1 speed must be lower than step 2 speed",
},
{
name: "InvalidSpeedRange",
config: fancontroller.FanControllerConfig{
Steps: []fancontroller.FanControllerStep{
{Temperature: 20, Speed: 10},
{Temperature: 30, Speed: 200},
},
},
errMsg: "speed must be between 0 and 100",
},
}
for _, tc := range testCases {
config := tc.config
expectedErrMsg := tc.errMsg
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
_, err := fancontroller.NewLinearFanController(config)
if err == nil {
t.Errorf("Expected error with message '%s', but got no error", expectedErrMsg)
} else if err.Error() != expectedErrMsg {
t.Errorf("Expected error message '%s', but got '%s'", expectedErrMsg, err.Error())
}
})
}
}

View File

@@ -22,6 +22,11 @@ var (
Name: "fan_speed",
Help: "Fan speed in RPM",
})
socTemperature = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: "computeblade",
Name: "soc_temperature",
Help: "SoC temperature in °C",
})
computeModule = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "computeblade",
Name: "compute_modul_present",
@@ -103,6 +108,8 @@ type ComputeBladeHal interface {
SetLed(idx uint, color LedColor) error
// GetPowerStatus returns the current power status of the blade
GetPowerStatus() (PowerStatus, error)
// GetTemperature returns the current temperature of the SoC in °C
GetTemperature() (float64, error)
// GetEdgeButtonPressChan returns a channel emitting edge button press events
WaitForEdgeButtonPress(ctx context.Context) error
}

View File

@@ -6,7 +6,10 @@ import (
"context"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
"sync"
"syscall"
"time"
@@ -47,6 +50,8 @@ const (
bcm283xRegPwmclkCntrlBitEnable = 4
bcm283xDebounceInterval = 100 * time.Millisecond
bcm283xThermalZonePath = "/sys/class/thermal/thermal_zone0/temp"
)
type bcm283x struct {
@@ -114,17 +119,17 @@ func NewCm4Hal(opts ComputeBladeHalOpts) (ComputeBladeHal, error) {
}
bcm := &bcm283x{
devmem: devmem,
gpioMem: gpioMem,
gpioMem8: gpioMem8,
pwmMem: pwmMem,
pwmMem8: pwmMem8,
clkMem: clkMem,
clkMem8: clkMem8,
gpioChip0: gpioChip0,
opts: opts,
devmem: devmem,
gpioMem: gpioMem,
gpioMem8: gpioMem8,
pwmMem: pwmMem,
pwmMem8: pwmMem8,
clkMem: clkMem,
clkMem8: clkMem8,
gpioChip0: gpioChip0,
opts: opts,
edgeButtonDebounceChan: make(chan struct{}, 1),
edgeButtonWatchChan: make(chan struct{}),
edgeButtonWatchChan: make(chan struct{}),
}
computeModule.WithLabelValues("cm4").Set(1)
@@ -181,7 +186,7 @@ func (bcm *bcm283x) handleEdgeButtonEdge(evt gpiod.LineEvent) {
case bcm.edgeButtonDebounceChan <- struct{}{}:
go func() {
// Manually debounce the button
defer <- bcm.edgeButtonDebounceChan
<-bcm.edgeButtonDebounceChan
time.Sleep(bcm283xDebounceInterval)
edgeButtonEventCount.Inc()
close(bcm.edgeButtonWatchChan)
@@ -440,3 +445,27 @@ func (bcm *bcm283x) updateLEDs() error {
return nil
}
// GetTemperature returns the current temperature of the SoC
func (bcm *bcm283x) GetTemperature() (float64, error) {
// Read temperature
f, err := os.Open(bcm283xThermalZonePath)
if err != nil {
return -1, err
}
raw, err := io.ReadAll(f)
if err != nil {
return -1, err
}
cpuTemp, err := strconv.Atoi(strings.TrimSpace(string(raw)))
if err != nil {
return -1, err
}
temp := float64(cpuTemp) / 1000.0
socTemperature.Set(temp)
return temp, nil
}

View File

@@ -76,3 +76,8 @@ func (m *SimulatedHal) SetLed(idx uint, color LedColor) error {
m.logger.Info("SetLed", zap.Uint("idx", idx), zap.Any("color", color))
return nil
}
func (m *SimulatedHal) GetTemperature() (float64, error) {
m.logger.Info("GetTemperature")
return 42, nil
}

View File

@@ -48,3 +48,8 @@ func (m *ComputeBladeHalMock) SetLed(idx uint, color LedColor) error {
args := m.Called(idx, color)
return args.Error(0)
}
func (m *ComputeBladeHalMock) GetTemperature() (float64, error) {
args := m.Called()
return args.Get(0).(float64), args.Error(1)
}