various: add internal health and runtime metrics
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Iae1dcf8495a00159d588c6e2344312f36a6a6964
This commit is contained in:
parent
d7cdf2cc49
commit
27b3641717
5 changed files with 187 additions and 4 deletions
56
internal/health/metrics.go
Normal file
56
internal/health/metrics.go
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
package health
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
)
|
||||
|
||||
// Holds health and runtime metrics for the watchdog process
|
||||
type Collector struct {
|
||||
buildInfo prometheus.Gauge
|
||||
startTime prometheus.Gauge
|
||||
}
|
||||
|
||||
// Creates a health metrics collector with build metadata
|
||||
func NewCollector(version, commit, buildDate string) *Collector {
|
||||
buildInfo := prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "watchdog_build_info",
|
||||
Help: "Build metadata for the running watchdog instance",
|
||||
ConstLabels: prometheus.Labels{
|
||||
"version": version,
|
||||
"commit": commit,
|
||||
"build_date": buildDate,
|
||||
},
|
||||
})
|
||||
buildInfo.Set(1)
|
||||
|
||||
startTime := prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "watchdog_start_time_seconds",
|
||||
Help: "Unix timestamp of when the watchdog process started",
|
||||
})
|
||||
startTime.Set(float64(time.Now().Unix()))
|
||||
|
||||
return &Collector{
|
||||
buildInfo: buildInfo,
|
||||
startTime: startTime,
|
||||
}
|
||||
}
|
||||
|
||||
// Registers all health metrics plus Go runtime collectors
|
||||
func (c *Collector) Register(reg prometheus.Registerer) error {
|
||||
if err := reg.Register(c.buildInfo); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := reg.Register(c.startTime); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := reg.Register(collectors.NewGoCollector()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := reg.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
103
internal/health/metrics_test.go
Normal file
103
internal/health/metrics_test.go
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
package health
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func TestNewCollector_RegistersMetrics(t *testing.T) {
|
||||
reg := prometheus.NewRegistry()
|
||||
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
|
||||
|
||||
if err := c.Register(reg); err != nil {
|
||||
t.Fatalf("Register failed: %v", err)
|
||||
}
|
||||
|
||||
metrics, err := reg.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("Gather failed: %v", err)
|
||||
}
|
||||
|
||||
// Should have at least build_info and uptime
|
||||
names := make(map[string]bool)
|
||||
for _, m := range metrics {
|
||||
names[m.GetName()] = true
|
||||
}
|
||||
|
||||
if !names["watchdog_build_info"] {
|
||||
t.Error("expected watchdog_build_info metric")
|
||||
}
|
||||
if !names["watchdog_start_time_seconds"] {
|
||||
t.Error("expected watchdog_start_time_seconds metric")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewCollector_BuildInfoLabels(t *testing.T) {
|
||||
reg := prometheus.NewRegistry()
|
||||
c := NewCollector("v1.2.3", "deadbeef", "2026-03-02")
|
||||
|
||||
if err := c.Register(reg); err != nil {
|
||||
t.Fatalf("Register failed: %v", err)
|
||||
}
|
||||
|
||||
metrics, err := reg.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("Gather failed: %v", err)
|
||||
}
|
||||
|
||||
for _, m := range metrics {
|
||||
if m.GetName() != "watchdog_build_info" {
|
||||
continue
|
||||
}
|
||||
|
||||
labels := make(map[string]string)
|
||||
for _, l := range m.GetMetric()[0].GetLabel() {
|
||||
labels[l.GetName()] = l.GetValue()
|
||||
}
|
||||
|
||||
if labels["version"] != "v1.2.3" {
|
||||
t.Errorf("expected version label %q, got %q", "v1.2.3", labels["version"])
|
||||
}
|
||||
if labels["commit"] != "deadbeef" {
|
||||
t.Errorf("expected commit label %q, got %q", "deadbeef", labels["commit"])
|
||||
}
|
||||
if labels["build_date"] != "2026-03-02" {
|
||||
t.Errorf(
|
||||
"expected build_date label %q, got %q",
|
||||
"2026-03-02",
|
||||
labels["build_date"],
|
||||
)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
t.Error("watchdog_build_info metric not found in gathered metrics")
|
||||
}
|
||||
|
||||
func TestNewCollector_StartTimeIsPositive(t *testing.T) {
|
||||
reg := prometheus.NewRegistry()
|
||||
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
|
||||
|
||||
if err := c.Register(reg); err != nil {
|
||||
t.Fatalf("Register failed: %v", err)
|
||||
}
|
||||
|
||||
metrics, err := reg.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("Gather failed: %v", err)
|
||||
}
|
||||
|
||||
for _, m := range metrics {
|
||||
if m.GetName() != "watchdog_start_time_seconds" {
|
||||
continue
|
||||
}
|
||||
val := m.GetMetric()[0].GetGauge().GetValue()
|
||||
if val <= 0 {
|
||||
t.Errorf("expected positive start time, got %v", val)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
t.Error("watchdog_start_time_seconds metric not found")
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue