various: add internal health and runtime metrics

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Iae1dcf8495a00159d588c6e2344312f36a6a6964
This commit is contained in:
raf 2026-03-02 21:38:43 +03:00
commit 27b3641717
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
5 changed files with 187 additions and 4 deletions

View file

@ -11,8 +11,11 @@ import (
)
var (
cfgFile string
cfg *config.Config
cfgFile string
cfg *config.Config
version string
commit string
buildDate string
)
var rootCmd = &cobra.Command{
@ -84,7 +87,8 @@ func initConfig() {
}
}
func Main() {
func Main(v, c, bd string) {
version, commit, buildDate = v, c, bd
if err := rootCmd.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)

View file

@ -17,6 +17,7 @@ import (
"notashelf.dev/watchdog/internal/aggregate"
"notashelf.dev/watchdog/internal/api"
"notashelf.dev/watchdog/internal/config"
"notashelf.dev/watchdog/internal/health"
"notashelf.dev/watchdog/internal/limits"
"notashelf.dev/watchdog/internal/normalize"
)
@ -55,6 +56,12 @@ func Run(cfg *config.Config) error {
metricsAgg.MustRegister(promRegistry)
promRegistry.MustRegister(blockedRequests)
// Register health and runtime metrics
healthCollector := health.NewCollector(version, commit, buildDate)
if err := healthCollector.Register(promRegistry); err != nil {
return fmt.Errorf("failed to register health metrics: %w", err)
}
// Create HTTP handlers
ingestionHandler := api.NewIngestionHandler(
cfg,

View file

@ -0,0 +1,56 @@
package health
import (
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
)
// Holds health and runtime metrics for the watchdog process
type Collector struct {
buildInfo prometheus.Gauge
startTime prometheus.Gauge
}
// Creates a health metrics collector with build metadata
func NewCollector(version, commit, buildDate string) *Collector {
buildInfo := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "watchdog_build_info",
Help: "Build metadata for the running watchdog instance",
ConstLabels: prometheus.Labels{
"version": version,
"commit": commit,
"build_date": buildDate,
},
})
buildInfo.Set(1)
startTime := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "watchdog_start_time_seconds",
Help: "Unix timestamp of when the watchdog process started",
})
startTime.Set(float64(time.Now().Unix()))
return &Collector{
buildInfo: buildInfo,
startTime: startTime,
}
}
// Registers all health metrics plus Go runtime collectors
func (c *Collector) Register(reg prometheus.Registerer) error {
if err := reg.Register(c.buildInfo); err != nil {
return err
}
if err := reg.Register(c.startTime); err != nil {
return err
}
if err := reg.Register(collectors.NewGoCollector()); err != nil {
return err
}
if err := reg.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil {
return err
}
return nil
}

View file

@ -0,0 +1,103 @@
package health
import (
"testing"
"github.com/prometheus/client_golang/prometheus"
)
func TestNewCollector_RegistersMetrics(t *testing.T) {
reg := prometheus.NewRegistry()
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
if err := c.Register(reg); err != nil {
t.Fatalf("Register failed: %v", err)
}
metrics, err := reg.Gather()
if err != nil {
t.Fatalf("Gather failed: %v", err)
}
// Should have at least build_info and uptime
names := make(map[string]bool)
for _, m := range metrics {
names[m.GetName()] = true
}
if !names["watchdog_build_info"] {
t.Error("expected watchdog_build_info metric")
}
if !names["watchdog_start_time_seconds"] {
t.Error("expected watchdog_start_time_seconds metric")
}
}
func TestNewCollector_BuildInfoLabels(t *testing.T) {
reg := prometheus.NewRegistry()
c := NewCollector("v1.2.3", "deadbeef", "2026-03-02")
if err := c.Register(reg); err != nil {
t.Fatalf("Register failed: %v", err)
}
metrics, err := reg.Gather()
if err != nil {
t.Fatalf("Gather failed: %v", err)
}
for _, m := range metrics {
if m.GetName() != "watchdog_build_info" {
continue
}
labels := make(map[string]string)
for _, l := range m.GetMetric()[0].GetLabel() {
labels[l.GetName()] = l.GetValue()
}
if labels["version"] != "v1.2.3" {
t.Errorf("expected version label %q, got %q", "v1.2.3", labels["version"])
}
if labels["commit"] != "deadbeef" {
t.Errorf("expected commit label %q, got %q", "deadbeef", labels["commit"])
}
if labels["build_date"] != "2026-03-02" {
t.Errorf(
"expected build_date label %q, got %q",
"2026-03-02",
labels["build_date"],
)
}
return
}
t.Error("watchdog_build_info metric not found in gathered metrics")
}
func TestNewCollector_StartTimeIsPositive(t *testing.T) {
reg := prometheus.NewRegistry()
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
if err := c.Register(reg); err != nil {
t.Fatalf("Register failed: %v", err)
}
metrics, err := reg.Gather()
if err != nil {
t.Fatalf("Gather failed: %v", err)
}
for _, m := range metrics {
if m.GetName() != "watchdog_start_time_seconds" {
continue
}
val := m.GetMetric()[0].GetGauge().GetValue()
if val <= 0 {
t.Errorf("expected positive start time, got %v", val)
}
return
}
t.Error("watchdog_start_time_seconds metric not found")
}

15
main.go
View file

@ -2,6 +2,19 @@ package main
import "notashelf.dev/watchdog/cmd/watchdog"
// Injected at build time via ldflags:
//
// -X main.Version=v1.0.0
// -X main.Commit=abc1234
// -X main.BuildDate=2026-03-02
//
// I hate this pattern btw.
var (
Version = "dev"
Commit = "unknown"
BuildDate = "unknown"
)
func main() {
watchdog.Main()
watchdog.Main(Version, Commit, BuildDate)
}