Files
compute-blade-agent/pkg/agent/state.go
Cedric Kienzler 7ec49ce05c feat(OpenTelemetry): Integrate OpenTelemetry into agent (#90)
* feat(OpenTelemetry): Integrate OpenTelemetry into agent

- integrate OpenTelemetry logging with zap logger for better observability
- add OpenTelemetry gRPC middleware for enhanced tracing capabilities
- document new OTLP exporter endpoint for better configuration guidance

* docs: document OTEL env var

---------

Co-authored-by: Cedric Kienzler <cedric@specht-labs.de>
2025-06-06 22:43:37 +02:00

117 lines
2.8 KiB
Go

package agent
import (
"context"
"sync"
"github.com/compute-blade-community/compute-blade-agent/pkg/events"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/spechtlabs/go-otel-utils/otelzap"
"go.uber.org/zap"
)
var (
stateMetric = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "computeblade_state",
Name: "state",
Help: "ComputeBlade state (label values are critical, identify, normal)",
}, []string{"state"})
)
type ComputebladeState interface {
RegisterEvent(event events.Event)
IdentifyActive() bool
WaitForIdentifyConfirm(ctx context.Context) error
CriticalActive() bool
WaitForCriticalClear(ctx context.Context) error
}
type computebladeStateImpl struct {
mutex sync.Mutex
// identifyActive indicates whether the blade is currently in identify mode
identifyActive bool
identifyConfirmChan chan struct{}
// criticalActive indicates whether the blade is currently in critical mode
criticalActive bool
criticalConfirmChan chan struct{}
}
func NewComputeBladeState() ComputebladeState {
return &computebladeStateImpl{
identifyConfirmChan: make(chan struct{}),
criticalConfirmChan: make(chan struct{}),
}
}
func (s *computebladeStateImpl) RegisterEvent(event events.Event) {
s.mutex.Lock()
defer s.mutex.Unlock()
switch event {
case events.IdentifyEvent:
s.identifyActive = true
case events.IdentifyConfirmEvent:
s.identifyActive = false
close(s.identifyConfirmChan)
s.identifyConfirmChan = make(chan struct{})
case events.CriticalEvent:
s.criticalActive = true
s.identifyActive = false
case events.CriticalResetEvent:
s.criticalActive = false
close(s.criticalConfirmChan)
s.criticalConfirmChan = make(chan struct{})
default:
otelzap.L().Warn("Unknown event", zap.String("event", event.String()))
}
// Set identify state metric
if s.identifyActive {
stateMetric.WithLabelValues("identify").Set(1)
} else {
stateMetric.WithLabelValues("identify").Set(0)
}
// Set critical state metric
if s.criticalActive {
stateMetric.WithLabelValues("critical").Set(1)
} else {
stateMetric.WithLabelValues("critical").Set(0)
}
// Set critical state metric
if !s.criticalActive && !s.identifyActive {
stateMetric.WithLabelValues("normal").Set(1)
} else {
stateMetric.WithLabelValues("normal").Set(0)
}
}
func (s *computebladeStateImpl) IdentifyActive() bool {
return s.identifyActive
}
func (s *computebladeStateImpl) WaitForIdentifyConfirm(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-s.identifyConfirmChan:
return nil
}
}
func (s *computebladeStateImpl) CriticalActive() bool {
return s.criticalActive
}
func (s *computebladeStateImpl) WaitForCriticalClear(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-s.criticalConfirmChan:
return nil
}
}