feat(agent)!: add support for mTLS authentication in gRPC server (#54)

* refactor(fancontroller): improve fan controller validation logic and error handling for temperature steps

* refactor(agent): restructure gRPC server implementation by moving it to a new api package for better organization and maintainability

* feat(agent): implement gRPC server for managing compute blade agents and add graceful shutdown support
refactor(agent): restructure agent code by moving API logic to a dedicated file and improving error handling
fix(agent): update logging messages for clarity and consistency across the agent's operations
chore(agent): remove unused API code and consolidate event handling logic for better maintainability
style(agent): improve code formatting and organization for better readability and adherence to conventions

* feat(agent): add support for TLS configuration in gRPC server

* feat(api): add gRPC server authentication

* fix

* feat(config): add listen mode configuration to support tcp or unix sockets
feat(agent): implement listen mode in gRPC service to allow flexible socket types
feat(bladectl): enhance configuration loading and add support for TLS credentials
fix(bladectl): improve error handling for gRPC connection and event emission
style(logging): change log level from Warn to Info for better clarity in logs

* add logging middleware + fixes

* fix remote-connection to gRPC API Server

debugging the SAN issues took the soul out of me... And then the stupid
mistake in cmd_root where I didn't construct the TLS credentials
correctly... Oh dear...

* cleanup

* cleanup

* cleanup commands

* cleanup

* make README.md nicer

* Update cmd/agent/main.go

Co-authored-by: Matthias Riegler <github@m4tbit.de>

* Update cmd/bladectl/cmd_root.go

Co-authored-by: Matthias Riegler <github@m4tbit.de>

* move bladectl config into correct directory

* fix bugs

* // FIXME: No dead code

* nit: code style

* nit(YAGNI): you aint gonna need it. Don't make life harder than it needs to be

* nit(YAGNI): you aint gonna need it. Don't make life harder than it needs to be

* nit(YAGNI): you aint gonna need it. Don't make life harder than it needs to be

* nit(cmd_identify)

---------

Co-authored-by: Matthias Riegler <github@m4tbit.de>
This commit is contained in:
Cedric Kienzler
2025-05-12 00:00:55 +02:00
committed by GitHub
parent ec6229ad86
commit 70541d86ba
60 changed files with 2189 additions and 650 deletions

View File

@@ -2,7 +2,7 @@ package main
import (
"github.com/spf13/cobra"
bladeapiv1alpha1 "github.com/uptime-induestries/compute-blade-agent/api/bladeapi/v1alpha1"
bladeapiv1alpha1 "github.com/uptime-industries/compute-blade-agent/api/bladeapi/v1alpha1"
)
var (

View File

@@ -1,9 +1,11 @@
package main
import (
"fmt"
"github.com/sierrasoftworks/humane-errors-go"
"github.com/spf13/cobra"
bladeapiv1alpha1 "github.com/uptime-induestries/compute-blade-agent/api/bladeapi/v1alpha1"
bladeapiv1alpha1 "github.com/uptime-industries/compute-blade-agent/api/bladeapi/v1alpha1"
"google.golang.org/protobuf/types/known/emptypb"
)
@@ -13,21 +15,27 @@ var (
)
func init() {
cmdIdentify.Flags().BoolVarP(&confirm, "confirm", "c", false, "confirm the identify state")
cmdIdentify.Flags().BoolVarP(&wait, "wait", "w", false, "Wait for the identify state to be confirmed (e.g. by a physical button press)")
cmdSet.AddCommand(cmdIdentify)
cmdSetIdentify.Flags().BoolVarP(&confirm, "confirm", "c", false, "confirm the identify state")
cmdSetIdentify.Flags().BoolVarP(&wait, "wait", "w", false, "Wait for the identify state to be confirmed (e.g. by a physical button press)")
cmdSet.AddCommand(cmdSetIdentify)
cmdRemove.AddCommand(cmdRmIdentify)
}
var cmdIdentify = &cobra.Command{
var cmdSetIdentify = &cobra.Command{
Use: "identify",
Example: "bladectl set identify --wait",
Short: "interact with the compute-blade identity LED",
RunE: runIdentity,
RunE: runSetIdentify,
}
func runIdentity(cmd *cobra.Command, _ []string) error {
var err error
var cmdRmIdentify = &cobra.Command{
Use: "identify",
Example: "bladectl unset identify",
Short: "remove the identify state with the compute-blade identity LED",
RunE: runRemoveIdentify,
}
func runSetIdentify(cmd *cobra.Command, _ []string) error {
ctx := cmd.Context()
client := clientFromContext(ctx)
@@ -38,17 +46,41 @@ func runIdentity(cmd *cobra.Command, _ []string) error {
}
// Emit the event to the compute-blade-agent
_, err = client.EmitEvent(ctx, &bladeapiv1alpha1.EmitEventRequest{Event: event})
_, err := client.EmitEvent(ctx, &bladeapiv1alpha1.EmitEventRequest{Event: event})
if err != nil {
return humane.Wrap(err, "failed to emit event", "ensure the compute-blade agent is running and responsive to requests", "check the compute-blade agent logs for more information using 'journalctl -u compute-blade-agent.service'")
return fmt.Errorf(humane.Wrap(err,
"failed to emit event",
"ensure the compute-blade agent is running and responsive to requests",
"check the compute-blade agent logs for more information using 'journalctl -u compute-blade-agent.service'",
).Display(),
)
}
// Check if we should wait for the identify state to be confirmed
if wait {
_, err := client.WaitForIdentifyConfirm(ctx, &emptypb.Empty{})
if err != nil {
return humane.Wrap(err, "unable to wait for confirmation", "ensure the compute-blade agent is running and responsive to requests", "check the compute-blade agent logs for more information using 'journalctl -u compute-blade-agent.service'")
}
if !wait {
return nil
}
if _, err := client.WaitForIdentifyConfirm(ctx, &emptypb.Empty{}); err != nil {
return humane.Wrap(err, "unable to wait for confirmation", "ensure the compute-blade agent is running and responsive to requests", "check the compute-blade agent logs for more information using 'journalctl -u compute-blade-agent.service'")
}
return nil
}
func runRemoveIdentify(cmd *cobra.Command, _ []string) error {
ctx := cmd.Context()
client := clientFromContext(ctx)
// Emit the event to the compute-blade-agent
_, err := client.EmitEvent(ctx, &bladeapiv1alpha1.EmitEventRequest{Event: bladeapiv1alpha1.Event_IDENTIFY_CONFIRM})
if err != nil {
return fmt.Errorf(humane.Wrap(err,
"failed to emit event",
"ensure the compute-blade agent is running and responsive to requests",
"check the compute-blade agent logs for more information using 'journalctl -u compute-blade-agent.service'",
).Display(),
)
}
return nil

View File

@@ -2,45 +2,161 @@ package main
import (
"context"
humane "github.com/sierrasoftworks/humane-errors-go"
"github.com/spf13/cobra"
bladeapiv1alpha1 "github.com/uptime-induestries/compute-blade-agent/api/bladeapi/v1alpha1"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"crypto/tls"
"crypto/x509"
"encoding/base64"
"fmt"
"net"
"os"
"os/signal"
"strings"
"syscall"
"time"
"github.com/sierrasoftworks/humane-errors-go"
"github.com/spf13/cobra"
"github.com/spf13/viper"
bladeapiv1alpha1 "github.com/uptime-industries/compute-blade-agent/api/bladeapi/v1alpha1"
"github.com/uptime-industries/compute-blade-agent/cmd/bladectl/config"
"github.com/uptime-industries/compute-blade-agent/pkg/log"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
)
var (
bladeName string
timeout time.Duration
)
func init() {
rootCmd.PersistentFlags().StringVar(&bladeName, "blade", "", "Name of the compute-blade to control. If not provided, the compute-blade specified in `current-blade` will be used.")
rootCmd.PersistentFlags().DurationVar(&timeout, "timeout", time.Minute, "timeout for gRPC requests")
}
var rootCmd = &cobra.Command{
Use: "bladectl",
Short: "bladectl interacts with the compute-blade-agent and allows you to manage hardware-features of your compute blade(s)",
PersistentPreRunE: func(cmd *cobra.Command, _ []string) error {
origCtx := cmd.Context()
// Load potential file configs
if err := viper.ReadInConfig(); err != nil {
return err
}
// load configuration
var bladectlCfg config.BladectlConfig
if err := viper.Unmarshal(&bladectlCfg); err != nil {
return err
}
var blade *config.Blade
blade, herr := bladectlCfg.FindBlade(bladeName)
if herr != nil {
return fmt.Errorf(herr.Display())
}
// setup signal handlers for SIGINT and SIGTERM
ctx, cancelCtx := context.WithTimeout(origCtx, timeout)
// setup signal handler channels
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
go func() {
// Wait for context cancel or signal
select {
// Wait for context cancel
case <-ctx.Done():
case <-sigs:
// On signal, cancel context
cancelCtx()
// Wait for signal
case sig := <-sigs:
switch sig {
case syscall.SIGTERM:
fallthrough
case syscall.SIGINT:
fallthrough
case syscall.SIGQUIT:
// On terminate signal, cancel context causing the program to terminate
cancelCtx()
default:
log.FromContext(ctx).Warn("Received unknown signal", zap.String("signal", sig.String()))
}
}
}()
conn, err := grpc.Dial(grpcAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return humane.Wrap(err, "failed to dial grpc server", "ensure the gRPC server you are trying to connect to is running and the address is correct")
}
client := bladeapiv1alpha1.NewBladeAgentServiceClient(conn)
// Create our gRPC Transport Credentials
credentials := insecure.NewCredentials()
certData := blade.Certificate
// If we're presented with certificate data in the config, we try to create a mTLS connection
if len(certData.ClientCertificateData) > 0 && len(certData.ClientKeyData) > 0 && len(certData.CertificateAuthorityData) > 0 {
var err error
serverName := blade.Server
if strings.Contains(serverName, ":") {
if serverName, _, err = net.SplitHostPort(blade.Server); err != nil {
return fmt.Errorf("failed to parse server address: %w", err)
}
}
if credentials, err = loadTlsCredentials(serverName, certData); err != nil {
return err
}
}
conn, err := grpc.NewClient(blade.Server, grpc.WithTransportCredentials(credentials))
if err != nil {
return fmt.Errorf(
humane.Wrap(err,
"failed to dial grpc server",
"ensure the gRPC server you are trying to connect to is running and the address is correct",
).Display(),
)
}
client := bladeapiv1alpha1.NewBladeAgentServiceClient(conn)
cmd.SetContext(clientIntoContext(ctx, client))
return nil
},
}
func loadTlsCredentials(server string, certData config.Certificate) (credentials.TransportCredentials, error) {
// Decode base64 certificate, key, and CA
certPEM, err := base64.StdEncoding.DecodeString(certData.ClientCertificateData)
if err != nil {
return nil, fmt.Errorf("invalid base64 client cert: %w", err)
}
keyPEM, err := base64.StdEncoding.DecodeString(certData.ClientKeyData)
if err != nil {
return nil, fmt.Errorf("invalid base64 client key: %w", err)
}
caPEM, err := base64.StdEncoding.DecodeString(certData.CertificateAuthorityData)
if err != nil {
return nil, fmt.Errorf("invalid base64 CA cert: %w", err)
}
// Load client cert/key pair
tlsCert, err := tls.X509KeyPair(certPEM, keyPEM)
if err != nil {
return nil, fmt.Errorf("failed to parse client cert/key pair: %w", err)
}
// Load CA into CertPool
caPool := x509.NewCertPool()
if !caPool.AppendCertsFromPEM(caPEM) {
return nil, fmt.Errorf("failed to append CA certificate")
}
tlsConfig := &tls.Config{
Certificates: []tls.Certificate{tlsCert},
RootCAs: caPool,
ServerName: server,
}
return credentials.NewTLS(tlsConfig), nil
}

View File

@@ -21,4 +21,11 @@ var (
Short: "Configure compute-blade",
Long: "These commands allow you make changes to compute-blade related information.",
}
cmdRemove = &cobra.Command{
Use: "remove",
Aliases: []string{"rm", "delete", "del", "unset"},
Short: "Configure compute-blade",
Long: "These commands allow you make changes to compute-blade related information.",
}
)

View File

@@ -2,6 +2,7 @@ package main
import (
"fmt"
"github.com/spf13/cobra"
)

View File

@@ -0,0 +1,94 @@
package config
import (
"encoding/base64"
"os"
"path/filepath"
"github.com/sierrasoftworks/humane-errors-go"
"go.uber.org/zap"
)
type BladectlConfig struct {
Blades []NamedBlade `yaml:"blades" mapstructure:"blades"`
CurrentBlade string `yaml:"current-blade" mapstructure:"current-blade"`
}
type NamedBlade struct {
Name string `yaml:"name" mapstructure:"name"`
Blade Blade `yaml:"blade" mapstructure:"blade"`
}
type Blade struct {
Server string `yaml:"server" mapstructure:"server"`
Certificate Certificate `yaml:"cert,omitempty" mapstructure:"cert,omitempty"`
}
type Certificate struct {
CertificateAuthorityData string `yaml:"certificate-authority-data,omitempty" mapstructure:"certificate-authority-data,omitempty"`
ClientCertificateData string `yaml:"client-certificate-data,omitempty" mapstructure:"client-certificate-data,omitempty"`
ClientKeyData string `yaml:"client-key-data,omitempty" mapstructure:"client-key-data,omitempty"`
}
func (c *BladectlConfig) FindBlade(name string) (*Blade, humane.Error) {
if len(name) == 0 {
name = c.CurrentBlade
}
for _, blade := range c.Blades {
if blade.Name == name {
return &blade.Blade, nil
}
}
return nil, humane.New("current blade not found in configuration",
"ensure you have a current-blade set in your configuration file, or use the --current-blade flag to specify one",
"make sure you have a blade with the name you specified in the blades configuration",
)
}
func NewAuthenticatedBladectlConfig(server string, caPEM []byte, clientCertDER []byte, clientKeyDER []byte) *BladectlConfig {
cfg := NewBladectlConfig(server)
cfg.Blades[0].Blade.Certificate.CertificateAuthorityData = base64.StdEncoding.EncodeToString(caPEM)
cfg.Blades[0].Blade.Certificate.ClientCertificateData = base64.StdEncoding.EncodeToString(clientCertDER)
cfg.Blades[0].Blade.Certificate.ClientKeyData = base64.StdEncoding.EncodeToString(clientKeyDER)
return cfg
}
func NewBladectlConfig(server string) *BladectlConfig {
hostname, err := os.Hostname()
if err != nil {
zap.L().Fatal("Failed to extract hostname", zap.Error(err))
}
return &BladectlConfig{
Blades: []NamedBlade{
{
Name: hostname,
Blade: Blade{
Server: server,
},
},
},
CurrentBlade: hostname,
}
}
func EnsureBladectlConfigHome() (string, humane.Error) {
homeDir, err := os.UserHomeDir()
if err != nil {
return "", humane.Wrap(err, "Failed to extract home directory",
"this should never happen",
"please report this as a bug to https://github.com/uptime-industries/compute-blade-agent/issues",
)
}
configDir := filepath.Join(homeDir, ".config", "bladectl")
if err := os.MkdirAll(configDir, 0700); err != nil {
return "", humane.Wrap(err, "Failed to create config directory",
"ensure the home-directory is writable by the agent user",
)
}
return configDir, nil
}

View File

@@ -3,33 +3,24 @@ package main
import (
"context"
"log"
"time"
"strings"
bladeapiv1alpha1 "github.com/uptime-induestries/compute-blade-agent/api/bladeapi/v1alpha1"
"github.com/spf13/viper"
bladeapiv1alpha1 "github.com/uptime-industries/compute-blade-agent/api/bladeapi/v1alpha1"
)
type grpcClientContextKey int
const (
defaultGrpcClientContextKey grpcClientContextKey = 0
defaultGrpcClientConnContextKey grpcClientContextKey = 1
defaultGrpcClientContextKey grpcClientContextKey = 0
)
var (
grpcAddr string
timeout time.Duration
Version string
Commit string
Date string
)
func init() {
rootCmd.PersistentFlags().
StringVar(&grpcAddr, "addr", "unix:///tmp/compute-blade-agent.sock", "address of the compute-blade-agent gRPC server")
rootCmd.PersistentFlags().DurationVar(&timeout, "timeout", time.Minute, "timeout for gRPC requests")
}
func clientIntoContext(ctx context.Context, client bladeapiv1alpha1.BladeAgentServiceClient) context.Context {
return context.WithValue(ctx, defaultGrpcClientContextKey, client)
}
@@ -43,6 +34,13 @@ func clientFromContext(ctx context.Context) bladeapiv1alpha1.BladeAgentServiceCl
}
func main() {
// Setup configuration
viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_"))
viper.AutomaticEnv()
viper.SetConfigName("config")
viper.SetConfigType("yaml")
viper.AddConfigPath("$HOME/.config/bladectl")
if err := rootCmd.Execute(); err != nil {
log.Fatal(err)
}