mirror of
https://github.com/compute-blade-community/compute-blade-agent.git
synced 2026-04-21 17:45:43 +02:00
feat(agent)!: add support for mTLS authentication in gRPC server (#54)
* refactor(fancontroller): improve fan controller validation logic and error handling for temperature steps * refactor(agent): restructure gRPC server implementation by moving it to a new api package for better organization and maintainability * feat(agent): implement gRPC server for managing compute blade agents and add graceful shutdown support refactor(agent): restructure agent code by moving API logic to a dedicated file and improving error handling fix(agent): update logging messages for clarity and consistency across the agent's operations chore(agent): remove unused API code and consolidate event handling logic for better maintainability style(agent): improve code formatting and organization for better readability and adherence to conventions * feat(agent): add support for TLS configuration in gRPC server * feat(api): add gRPC server authentication * fix * feat(config): add listen mode configuration to support tcp or unix sockets feat(agent): implement listen mode in gRPC service to allow flexible socket types feat(bladectl): enhance configuration loading and add support for TLS credentials fix(bladectl): improve error handling for gRPC connection and event emission style(logging): change log level from Warn to Info for better clarity in logs * add logging middleware + fixes * fix remote-connection to gRPC API Server debugging the SAN issues took the soul out of me... And then the stupid mistake in cmd_root where I didn't construct the TLS credentials correctly... Oh dear... * cleanup * cleanup * cleanup commands * cleanup * make README.md nicer * Update cmd/agent/main.go Co-authored-by: Matthias Riegler <github@m4tbit.de> * Update cmd/bladectl/cmd_root.go Co-authored-by: Matthias Riegler <github@m4tbit.de> * move bladectl config into correct directory * fix bugs * // FIXME: No dead code * nit: code style * nit(YAGNI): you aint gonna need it. Don't make life harder than it needs to be * nit(YAGNI): you aint gonna need it. Don't make life harder than it needs to be * nit(YAGNI): you aint gonna need it. Don't make life harder than it needs to be * nit(cmd_identify) --------- Co-authored-by: Matthias Riegler <github@m4tbit.de>
This commit is contained in:
@@ -1,16 +1,15 @@
|
||||
# Default configuration for the compute-blade-agent
|
||||
|
||||
log:
|
||||
mode: production # production, development
|
||||
|
||||
# Listen configuration
|
||||
listen:
|
||||
metrics: ":9666"
|
||||
grpc: /tmp/compute-blade-agent.sock
|
||||
authenticated: false
|
||||
mode: unix # tcp or unix
|
||||
|
||||
# Hardware abstraction layer configuration
|
||||
hal:
|
||||
# For the default fan unit, fanspeed measurement is causing a tiny bit of CPU laod.
|
||||
# For the default fan unit, fanspeed measurement is causing a tiny bit of CPU load.
|
||||
# Sometimes it might not be desired
|
||||
rpm_reporting_standard_fan_unit: true
|
||||
|
||||
@@ -35,14 +34,13 @@ criticalLedColor:
|
||||
# Enable/disable stealth mode; turns off all LEDs on the blade
|
||||
stealth_mode: false
|
||||
|
||||
|
||||
# Simple fan-speed controls based on the SoC temperature
|
||||
fan_controller:
|
||||
# For now, this is only supporting a two-step configuration.
|
||||
steps:
|
||||
- temperature: 45
|
||||
percent: 40
|
||||
- temperature: 55
|
||||
percent: 80
|
||||
|
||||
# Critical temperature threshold
|
||||
critical_temperature_threshold: 60
|
||||
|
||||
@@ -2,8 +2,8 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/pprof"
|
||||
"os"
|
||||
@@ -14,12 +14,12 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/spf13/viper"
|
||||
bladeapiv1alpha1 "github.com/uptime-induestries/compute-blade-agent/api/bladeapi/v1alpha1"
|
||||
"github.com/uptime-induestries/compute-blade-agent/internal/agent"
|
||||
"github.com/uptime-induestries/compute-blade-agent/pkg/log"
|
||||
"github.com/uptime-industries/compute-blade-agent/internal/agent"
|
||||
"github.com/uptime-industries/compute-blade-agent/internal/api"
|
||||
"github.com/uptime-industries/compute-blade-agent/pkg/log"
|
||||
"go.uber.org/zap"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -28,13 +28,12 @@ var (
|
||||
Date string
|
||||
)
|
||||
|
||||
var debug = pflag.BoolP("debug", "v", false, "enable verbose logging")
|
||||
|
||||
func main() {
|
||||
var wg sync.WaitGroup
|
||||
pflag.Parse()
|
||||
|
||||
// Setup configuration
|
||||
viper.SetConfigType("yaml")
|
||||
|
||||
// auto-bind environment variables
|
||||
viper.SetEnvPrefix("BLADE")
|
||||
viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_"))
|
||||
viper.AutomaticEnv()
|
||||
@@ -49,13 +48,11 @@ func main() {
|
||||
|
||||
// setup logger
|
||||
var baseLogger *zap.Logger
|
||||
switch logMode := viper.GetString("log.mode"); logMode {
|
||||
case "development":
|
||||
|
||||
if debug != nil && *debug {
|
||||
baseLogger = zap.Must(zap.NewDevelopment())
|
||||
case "production":
|
||||
} else {
|
||||
baseLogger = zap.Must(zap.NewProduction())
|
||||
default:
|
||||
panic(fmt.Errorf("invalid log.mode: %s", logMode))
|
||||
}
|
||||
|
||||
zapLogger := baseLogger.With(zap.String("app", "compute-blade-agent"))
|
||||
@@ -71,73 +68,96 @@ func main() {
|
||||
// load configuration
|
||||
var cbAgentConfig agent.ComputeBladeAgentConfig
|
||||
if err := viper.Unmarshal(&cbAgentConfig); err != nil {
|
||||
log.FromContext(ctx).Error("Failed to load configuration", zap.Error(err))
|
||||
cancelCtx(err)
|
||||
log.FromContext(ctx).Fatal("Failed to load configuration", zap.Error(err))
|
||||
}
|
||||
|
||||
// setup stop signal handlers
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||
wg.Add(1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
// Wait for context cancel or signal
|
||||
select {
|
||||
// Wait for context cancel
|
||||
case <-ctx.Done():
|
||||
|
||||
// Wait for signal
|
||||
case sig := <-sigs:
|
||||
// On signal, cancel context
|
||||
cancelCtx(fmt.Errorf("signal %s received", sig))
|
||||
switch sig {
|
||||
case syscall.SIGTERM:
|
||||
fallthrough
|
||||
case syscall.SIGINT:
|
||||
fallthrough
|
||||
case syscall.SIGQUIT:
|
||||
// On terminate signal, cancel context causing the program to terminate
|
||||
cancelCtx(fmt.Errorf("signal %s received", sig))
|
||||
|
||||
default:
|
||||
log.FromContext(ctx).Warn("Received unknown signal", zap.String("signal", sig.String()))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
log.FromContext(ctx).Info("Bootstrapping compute-blade-agent", zap.String("version", Version), zap.String("commit", Commit), zap.String("date", Date))
|
||||
computebladeAgent, err := agent.NewComputeBladeAgent(ctx, cbAgentConfig)
|
||||
if err != nil {
|
||||
log.FromContext(ctx).Error("Failed to create agent", zap.Error(err))
|
||||
cancelCtx(err)
|
||||
os.Exit(1)
|
||||
log.FromContext(ctx).Fatal("Failed to create agent", zap.Error(err))
|
||||
}
|
||||
|
||||
// Run agent
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
log.FromContext(ctx).Info("Starting agent")
|
||||
err := computebladeAgent.Run(ctx)
|
||||
if err != nil && err != context.Canceled {
|
||||
log.FromContext(ctx).Error("Failed to run agent", zap.Error(err))
|
||||
cancelCtx(err)
|
||||
}
|
||||
}()
|
||||
computebladeAgent.RunAsync(ctx, cancelCtx)
|
||||
|
||||
// Setup GRPC server
|
||||
// FIXME add logging middleware
|
||||
grpcServer := grpc.NewServer()
|
||||
bladeapiv1alpha1.RegisterBladeAgentServiceServer(grpcServer, agent.NewGrpcServiceFor(computebladeAgent))
|
||||
grpcServer := api.NewGrpcApiServer(ctx,
|
||||
api.WithComputeBladeAgent(computebladeAgent),
|
||||
api.WithAuthentication(cbAgentConfig.Listen.GrpcAuthenticated),
|
||||
api.WithListenAddr(cbAgentConfig.Listen.Grpc),
|
||||
api.WithListenMode(cbAgentConfig.Listen.GrpcListenMode),
|
||||
)
|
||||
|
||||
// Run gRPC API
|
||||
grpcServer.ServeAsync(ctx, cancelCtx)
|
||||
|
||||
// setup prometheus endpoint
|
||||
promServer := runPrometheusEndpoint(ctx, cancelCtx, &cbAgentConfig.Listen)
|
||||
|
||||
// Wait for done
|
||||
<-ctx.Done()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Shut-Down GRPC Server
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
grpcListen, err := net.Listen("unix", viper.GetString("listen.grpc"))
|
||||
if err != nil {
|
||||
log.FromContext(ctx).Error("Failed to create grpc listener", zap.Error(err))
|
||||
cancelCtx(err)
|
||||
return
|
||||
}
|
||||
log.FromContext(ctx).Info("Starting grpc server", zap.String("address", viper.GetString("listen.grpc")))
|
||||
if err := grpcServer.Serve(grpcListen); err != nil && err != grpc.ErrServerStopped {
|
||||
log.FromContext(ctx).Error("Failed to start grpc server", zap.Error(err))
|
||||
cancelCtx(err)
|
||||
}
|
||||
}()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-ctx.Done()
|
||||
log.FromContext(ctx).Info("Shutting down grpc server")
|
||||
grpcServer.GracefulStop()
|
||||
}()
|
||||
|
||||
// setup prometheus endpoint
|
||||
// Shut-Down Prometheus Endpoint
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
shutdownCtx, shutdownCtxCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer shutdownCtxCancel()
|
||||
|
||||
if err := promServer.Shutdown(shutdownCtx); err != nil {
|
||||
log.FromContext(ctx).Error("Failed to shutdown prometheus/pprof server", zap.Error(err))
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Wait for context cancel
|
||||
if err := ctx.Err(); !errors.Is(err, context.Canceled) {
|
||||
log.FromContext(ctx).Fatal("Exiting", zap.Error(err))
|
||||
} else {
|
||||
log.FromContext(ctx).Info("Exiting")
|
||||
}
|
||||
}
|
||||
|
||||
func runPrometheusEndpoint(ctx context.Context, cancel context.CancelCauseFunc, apiConfig *api.Config) *http.Server {
|
||||
instrumentationHandler := http.NewServeMux()
|
||||
instrumentationHandler.Handle("/metrics", promhttp.Handler())
|
||||
instrumentationHandler.HandleFunc("/debug/pprof/", pprof.Index)
|
||||
@@ -145,33 +165,17 @@ func main() {
|
||||
instrumentationHandler.HandleFunc("/debug/pprof/profile", pprof.Profile)
|
||||
instrumentationHandler.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
|
||||
instrumentationHandler.HandleFunc("/debug/pprof/trace", pprof.Trace)
|
||||
server := &http.Server{Addr: ":9666", Handler: instrumentationHandler}
|
||||
wg.Add(1)
|
||||
|
||||
server := &http.Server{Addr: apiConfig.Metrics, Handler: instrumentationHandler}
|
||||
|
||||
// Run Prometheus Endpoint
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
err := server.ListenAndServe()
|
||||
if err != nil && err != http.ErrServerClosed {
|
||||
if err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
log.FromContext(ctx).Error("Failed to start prometheus/pprof server", zap.Error(err))
|
||||
cancelCtx(err)
|
||||
}
|
||||
}()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-ctx.Done()
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
err := server.Shutdown(shutdownCtx)
|
||||
if err != nil {
|
||||
log.FromContext(ctx).Error("Failed to shutdown prometheus/pprof server", zap.Error(err))
|
||||
cancel(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for context cancel
|
||||
wg.Wait()
|
||||
if err := ctx.Err(); err != nil && err != context.Canceled {
|
||||
log.FromContext(ctx).Fatal("Exiting", zap.Error(err))
|
||||
} else {
|
||||
log.FromContext(ctx).Info("Exiting")
|
||||
}
|
||||
return server
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user