diff --git a/cmd/watchdog/main.go b/cmd/watchdog/main.go index 3d11120..ed8c2da 100644 --- a/cmd/watchdog/main.go +++ b/cmd/watchdog/main.go @@ -11,8 +11,11 @@ import ( ) var ( - cfgFile string - cfg *config.Config + cfgFile string + cfg *config.Config + version string + commit string + buildDate string ) var rootCmd = &cobra.Command{ @@ -84,7 +87,8 @@ func initConfig() { } } -func Main() { +func Main(v, c, bd string) { + version, commit, buildDate = v, c, bd if err := rootCmd.Execute(); err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) os.Exit(1) diff --git a/cmd/watchdog/root.go b/cmd/watchdog/root.go index b933cea..8d2050f 100644 --- a/cmd/watchdog/root.go +++ b/cmd/watchdog/root.go @@ -17,6 +17,7 @@ import ( "notashelf.dev/watchdog/internal/aggregate" "notashelf.dev/watchdog/internal/api" "notashelf.dev/watchdog/internal/config" + "notashelf.dev/watchdog/internal/health" "notashelf.dev/watchdog/internal/limits" "notashelf.dev/watchdog/internal/normalize" ) @@ -55,6 +56,12 @@ func Run(cfg *config.Config) error { metricsAgg.MustRegister(promRegistry) promRegistry.MustRegister(blockedRequests) + // Register health and runtime metrics + healthCollector := health.NewCollector(version, commit, buildDate) + if err := healthCollector.Register(promRegistry); err != nil { + return fmt.Errorf("failed to register health metrics: %w", err) + } + // Create HTTP handlers ingestionHandler := api.NewIngestionHandler( cfg, diff --git a/internal/health/metrics.go b/internal/health/metrics.go new file mode 100644 index 0000000..a49c659 --- /dev/null +++ b/internal/health/metrics.go @@ -0,0 +1,56 @@ +package health + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" +) + +// Holds health and runtime metrics for the watchdog process +type Collector struct { + buildInfo prometheus.Gauge + startTime prometheus.Gauge +} + +// Creates a health metrics collector with build metadata +func NewCollector(version, commit, buildDate string) *Collector { + buildInfo := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "watchdog_build_info", + Help: "Build metadata for the running watchdog instance", + ConstLabels: prometheus.Labels{ + "version": version, + "commit": commit, + "build_date": buildDate, + }, + }) + buildInfo.Set(1) + + startTime := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "watchdog_start_time_seconds", + Help: "Unix timestamp of when the watchdog process started", + }) + startTime.Set(float64(time.Now().Unix())) + + return &Collector{ + buildInfo: buildInfo, + startTime: startTime, + } +} + +// Registers all health metrics plus Go runtime collectors +func (c *Collector) Register(reg prometheus.Registerer) error { + if err := reg.Register(c.buildInfo); err != nil { + return err + } + if err := reg.Register(c.startTime); err != nil { + return err + } + if err := reg.Register(collectors.NewGoCollector()); err != nil { + return err + } + if err := reg.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil { + return err + } + return nil +} diff --git a/internal/health/metrics_test.go b/internal/health/metrics_test.go new file mode 100644 index 0000000..7df5dbb --- /dev/null +++ b/internal/health/metrics_test.go @@ -0,0 +1,103 @@ +package health + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestNewCollector_RegistersMetrics(t *testing.T) { + reg := prometheus.NewRegistry() + c := NewCollector("v0.1.0", "abc1234", "2026-03-02") + + if err := c.Register(reg); err != nil { + t.Fatalf("Register failed: %v", err) + } + + metrics, err := reg.Gather() + if err != nil { + t.Fatalf("Gather failed: %v", err) + } + + // Should have at least build_info and uptime + names := make(map[string]bool) + for _, m := range metrics { + names[m.GetName()] = true + } + + if !names["watchdog_build_info"] { + t.Error("expected watchdog_build_info metric") + } + if !names["watchdog_start_time_seconds"] { + t.Error("expected watchdog_start_time_seconds metric") + } +} + +func TestNewCollector_BuildInfoLabels(t *testing.T) { + reg := prometheus.NewRegistry() + c := NewCollector("v1.2.3", "deadbeef", "2026-03-02") + + if err := c.Register(reg); err != nil { + t.Fatalf("Register failed: %v", err) + } + + metrics, err := reg.Gather() + if err != nil { + t.Fatalf("Gather failed: %v", err) + } + + for _, m := range metrics { + if m.GetName() != "watchdog_build_info" { + continue + } + + labels := make(map[string]string) + for _, l := range m.GetMetric()[0].GetLabel() { + labels[l.GetName()] = l.GetValue() + } + + if labels["version"] != "v1.2.3" { + t.Errorf("expected version label %q, got %q", "v1.2.3", labels["version"]) + } + if labels["commit"] != "deadbeef" { + t.Errorf("expected commit label %q, got %q", "deadbeef", labels["commit"]) + } + if labels["build_date"] != "2026-03-02" { + t.Errorf( + "expected build_date label %q, got %q", + "2026-03-02", + labels["build_date"], + ) + } + return + } + + t.Error("watchdog_build_info metric not found in gathered metrics") +} + +func TestNewCollector_StartTimeIsPositive(t *testing.T) { + reg := prometheus.NewRegistry() + c := NewCollector("v0.1.0", "abc1234", "2026-03-02") + + if err := c.Register(reg); err != nil { + t.Fatalf("Register failed: %v", err) + } + + metrics, err := reg.Gather() + if err != nil { + t.Fatalf("Gather failed: %v", err) + } + + for _, m := range metrics { + if m.GetName() != "watchdog_start_time_seconds" { + continue + } + val := m.GetMetric()[0].GetGauge().GetValue() + if val <= 0 { + t.Errorf("expected positive start time, got %v", val) + } + return + } + + t.Error("watchdog_start_time_seconds metric not found") +} diff --git a/main.go b/main.go index ce24c8f..1f5d21b 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,19 @@ package main import "notashelf.dev/watchdog/cmd/watchdog" +// Injected at build time via ldflags: +// +// -X main.Version=v1.0.0 +// -X main.Commit=abc1234 +// -X main.BuildDate=2026-03-02 +// +// I hate this pattern btw. +var ( + Version = "dev" + Commit = "unknown" + BuildDate = "unknown" +) + func main() { - watchdog.Main() + watchdog.Main(Version, Commit, BuildDate) }