From 7b06c4f2ca7c410a9a0ff334c7c77b7884bc62e6 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Sun, 1 Mar 2026 21:14:07 +0300
Subject: [PATCH 01/10] various: extract magic numbers into named constants

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I854b2f9b5f39e4629c32e5681e6322826a6a6964
---
 cmd/watchdog/root.go          | 12 ++++++------
 internal/aggregate/metrics.go |  5 +++--
 internal/limits/constants.go  | 11 +++++++++++
 internal/normalize/path.go    |  3 ++-
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/cmd/watchdog/root.go b/cmd/watchdog/root.go
index e64e158..9ab1df0 100644
--- a/cmd/watchdog/root.go
+++ b/cmd/watchdog/root.go
@@ -11,13 +11,13 @@ import (
 	"path/filepath"
 	"strings"
 	"syscall"
-	"time"
 
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"notashelf.dev/watchdog/internal/aggregate"
 	"notashelf.dev/watchdog/internal/api"
 	"notashelf.dev/watchdog/internal/config"
+	"notashelf.dev/watchdog/internal/limits"
 	"notashelf.dev/watchdog/internal/normalize"
 )
 
@@ -91,9 +91,9 @@ func Run(cfg *config.Config) error {
 	srv := &http.Server{
 		Addr:         cfg.Server.ListenAddr,
 		Handler:      mux,
-		ReadTimeout:  10 * time.Second,
-		WriteTimeout: 10 * time.Second,
-		IdleTimeout:  60 * time.Second,
+		ReadTimeout:  limits.HTTPReadTimeout,
+		WriteTimeout: limits.HTTPWriteTimeout,
+		IdleTimeout:  limits.HTTPIdleTimeout,
 	}
 
 	// Start server in goroutine
@@ -115,8 +115,8 @@ func Run(cfg *config.Config) error {
 	case sig := <-shutdown:
 		log.Printf("Received signal: %v, starting graceful shutdown", sig)
 
-		// Give outstanding requests 30 seconds to complete
-		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+		// Give outstanding requests time to complete
+		ctx, cancel := context.WithTimeout(context.Background(), limits.ShutdownTimeout)
 		defer cancel()
 
 		// Shutdown metrics aggregator.
diff --git a/internal/aggregate/metrics.go b/internal/aggregate/metrics.go
index a9ba3ad..055896e 100644
--- a/internal/aggregate/metrics.go
+++ b/internal/aggregate/metrics.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/prometheus/client_golang/prometheus"
 	"notashelf.dev/watchdog/internal/config"
+	"notashelf.dev/watchdog/internal/limits"
 )
 
 var prometheusLabelPattern = regexp.MustCompile(`^[a-zA-Z0-9_/:.-]*$`)
@@ -121,11 +122,11 @@ func NewMetricsAggregator(
 	return m
 }
 
-// Background goroutine to update the unique visitors gauge every 10 seconds
+// Background goroutine to update the unique visitors gauge periodically
 // instead of on every request. This should help with performance.
 func (m *MetricsAggregator) updateUniquesGauge() {
 	defer m.wg.Done()
-	ticker := time.NewTicker(10 * time.Second)
+	ticker := time.NewTicker(limits.UniquesUpdatePeriod)
 	defer ticker.Stop()
 
 	for {
diff --git a/internal/limits/constants.go b/internal/limits/constants.go
index 14985a9..2db5dc1 100644
--- a/internal/limits/constants.go
+++ b/internal/limits/constants.go
@@ -1,5 +1,7 @@
 package limits
 
+import "time"
+
 // Size limits for request processing
 const (
 	MaxEventSize = 4 * 1024 // 4KB max event payload
@@ -7,3 +9,12 @@ const (
 	MaxRefLen    = 2048     // max referrer length
 	MaxWidth     = 10000    // max reasonable screen width
 )
+
+// Timeout constants
+const (
+	HTTPReadTimeout     = 10 * time.Second // HTTP server read timeout
+	HTTPWriteTimeout    = 10 * time.Second // HTTP server write timeout
+	HTTPIdleTimeout     = 60 * time.Second // HTTP server idle timeout
+	ShutdownTimeout     = 30 * time.Second // graceful shutdown timeout
+	UniquesUpdatePeriod = 10 * time.Second // HLL gauge update interval
+)
diff --git a/internal/normalize/path.go b/internal/normalize/path.go
index 2148f34..9836b67 100644
--- a/internal/normalize/path.go
+++ b/internal/normalize/path.go
@@ -4,6 +4,7 @@ import (
 	"strings"
 
 	"notashelf.dev/watchdog/internal/config"
+	"notashelf.dev/watchdog/internal/limits"
 )
 
 type PathNormalizer struct {
@@ -14,7 +15,7 @@ type PathNormalizer struct {
 func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
 	return &PathNormalizer{
 		cfg:       cfg,
-		maxLength: 2048,
+		maxLength: limits.MaxPathLen,
 	}
 }
 

From 896ec1a40a404796004b5ad3847eceda39069e53 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Sun, 1 Mar 2026 21:21:20 +0300
Subject: [PATCH 02/10] watchdog: add metrics for blocked requests & logging

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ib1d876859422a6115772962ed9e207a46a6a6964
---
 cmd/watchdog/root.go | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/cmd/watchdog/root.go b/cmd/watchdog/root.go
index 9ab1df0..b933cea 100644
--- a/cmd/watchdog/root.go
+++ b/cmd/watchdog/root.go
@@ -31,6 +31,15 @@ func Run(cfg *config.Config) error {
 	eventRegistry := aggregate.NewCustomEventRegistry(cfg.Limits.MaxCustomEvents)
 	metricsAgg := aggregate.NewMetricsAggregator(pathRegistry, eventRegistry, cfg)
 
+	// Metric for tracking blocked file requests (scrapers/bots)
+	blockedRequests := prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Name: "web_blocked_requests_total",
+			Help: "File server requests blocked by security filters",
+		},
+		[]string{"reason"},
+	)
+
 	// Load HLL state from previous run if it exists
 	if cfg.Site.SaltRotation != "" {
 		log.Println("HLL state persistence enabled")
@@ -44,6 +53,7 @@ func Run(cfg *config.Config) error {
 	// Register Prometheus metrics
 	promRegistry := prometheus.NewRegistry()
 	metricsAgg.MustRegister(promRegistry)
+	promRegistry.MustRegister(blockedRequests)
 
 	// Create HTTP handlers
 	ingestionHandler := api.NewIngestionHandler(
@@ -84,7 +94,7 @@ func Run(cfg *config.Config) error {
 	// Serve whitelisted static files from /web/ if the directory exists
 	if info, err := os.Stat("web"); err == nil && info.IsDir() {
 		log.Println("Serving static files from /web/")
-		mux.Handle("/web/", safeFileServer("web"))
+		mux.Handle("/web/", safeFileServer("web", blockedRequests))
 	}
 
 	// Create HTTP server with timeouts
@@ -153,7 +163,7 @@ func basicAuth(next http.Handler, username, password string) http.Handler {
 // Creates a file server that only serves whitelisted files. Blocks dotfiles, .git, .env, etc.
 // TODO: I need to hook this up to eris somehow so I can just forward the paths that are being
 // scanned despite not being on a whitelist. Would be a good way of detecting scrapers, maybe.
-func safeFileServer(root string) http.Handler {
+func safeFileServer(root string, blockedRequests *prometheus.CounterVec) http.Handler {
 	fs := http.FileServer(http.Dir(root))
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		// Clean the path
@@ -161,6 +171,8 @@ func safeFileServer(root string) http.Handler {
 
 		// Block directory listings
 		if strings.HasSuffix(path, "/") {
+			blockedRequests.WithLabelValues("directory_listing").Inc()
+			log.Printf("Blocked directory listing attempt: %s from %s", path, r.RemoteAddr)
 			http.NotFound(w, r)
 			return
 		}
@@ -168,6 +180,8 @@ func safeFileServer(root string) http.Handler {
 		// Block dotfiles and sensitive files
 		for segment := range strings.SplitSeq(path, "/") {
 			if strings.HasPrefix(segment, ".") {
+				blockedRequests.WithLabelValues("dotfile").Inc()
+				log.Printf("Blocked dotfile access: %s from %s", path, r.RemoteAddr)
 				http.NotFound(w, r)
 				return
 			}
@@ -177,6 +191,8 @@ func safeFileServer(root string) http.Handler {
 				strings.Contains(lower, "config") ||
 				strings.HasSuffix(lower, ".bak") ||
 				strings.HasSuffix(lower, "~") {
+				blockedRequests.WithLabelValues("sensitive_file").Inc()
+				log.Printf("Blocked sensitive file access: %s from %s", path, r.RemoteAddr)
 				http.NotFound(w, r)
 				return
 			}
@@ -185,6 +201,8 @@ func safeFileServer(root string) http.Handler {
 		// Only serve .js, .html, .css files
 		ext := strings.ToLower(filepath.Ext(path))
 		if ext != ".js" && ext != ".html" && ext != ".css" {
+			blockedRequests.WithLabelValues("invalid_extension").Inc()
+			log.Printf("Blocked invalid extension: %s from %s", path, r.RemoteAddr)
 			http.NotFound(w, r)
 			return
 		}

From 6977a501b16e4cdfa89571cebc1d5134dfec0843 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 21:27:47 +0300
Subject: [PATCH 03/10] internal: better device classification via UA parsing

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I6c78f1eebe71ef4cf037ebbda2caaeb36a6a6964
---
 internal/api/handler.go      |  52 +++++++++++----
 internal/api/handler_test.go | 123 ++++++++++++++++++++++++++---------
 2 files changed, 130 insertions(+), 45 deletions(-)

diff --git a/internal/api/handler.go b/internal/api/handler.go
index bed0abf..4cb3905 100644
--- a/internal/api/handler.go
+++ b/internal/api/handler.go
@@ -137,7 +137,7 @@ func (h *IngestionHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 
 		// Device classification
 		if h.cfg.Site.Collect.Device {
-			device = h.classifyDevice(event.Width)
+			device = h.classifyDevice(event.Width, userAgent)
 		}
 
 		// Referrer classification
@@ -271,19 +271,43 @@ func (h *IngestionHandler) ipInCIDR(ip, cidr string) bool {
 	return network.Contains(testIP)
 }
 
-// Classifies screen width into device categories using configured breakpoints
-// FIXME: we need a more robust mechanism for classifying devices. Breakpoints
-// are the only ones I can think of *right now* but I'm positive there are better
-// mechanisns. We'll get to this later.
-func (h *IngestionHandler) classifyDevice(width int) string {
-	if width == 0 {
-		return "unknown"
-	}
-	if width < h.cfg.Limits.DeviceBreakpoints.Mobile {
-		return "mobile"
-	}
-	if width < h.cfg.Limits.DeviceBreakpoints.Tablet {
+// Classifies device using both screen width and User-Agent parsing
+// Uses UA hints for better detection, falls back to width breakpoints
+func (h *IngestionHandler) classifyDevice(width int, userAgent string) string {
+	// First try User-Agent based detection for better accuracy
+	ua := strings.ToLower(userAgent)
+
+	// Tablet detection via UA (must come before mobile: Android tablets lack "mobile" keyword)
+	if strings.Contains(ua, "tablet") ||
+		strings.Contains(ua, "ipad") ||
+		(strings.Contains(ua, "android") && !strings.Contains(ua, "mobile")) {
 		return "tablet"
 	}
-	return "desktop"
+
+	// Mobile detection via UA
+	if strings.Contains(ua, "mobile") ||
+		strings.Contains(ua, "iphone") ||
+		strings.Contains(ua, "ipod") ||
+		strings.Contains(ua, "windows phone") ||
+		strings.Contains(ua, "blackberry") {
+		return "mobile"
+	}
+
+	// If UA doesn't provide clear signal, use width breakpoints
+	if width > 0 {
+		if width < h.cfg.Limits.DeviceBreakpoints.Mobile {
+			return "mobile"
+		}
+		if width < h.cfg.Limits.DeviceBreakpoints.Tablet {
+			return "tablet"
+		}
+		return "desktop"
+	}
+
+	// Default to desktop if UA suggests desktop browser
+	if userAgent != "" {
+		return "desktop"
+	}
+
+	return "unknown"
 }
diff --git a/internal/api/handler_test.go b/internal/api/handler_test.go
index cb788d9..cee1dbb 100644
--- a/internal/api/handler_test.go
+++ b/internal/api/handler_test.go
@@ -2,7 +2,6 @@ package api
 
 import (
 	"bytes"
-	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"testing"
@@ -207,51 +206,113 @@ func TestIngestionHandler_InvalidJSON(t *testing.T) {
 	}
 }
 
-func TestIngestionHandler_DeviceClassification(t *testing.T) {
-	cfg := config.Config{
-		Site: config.SiteConfig{
-			Domains: []string{"example.com"},
-			Collect: config.CollectConfig{
-				Pageviews: true,
-				Device:    true,
-			},
-			Path: config.PathConfig{},
-		},
-		Limits: config.LimitsConfig{
-			MaxPaths:   100,
-			MaxSources: 50,
-		},
-	}
-
+func newTestHandler(cfg *config.Config) *IngestionHandler {
 	pathNorm := normalize.NewPathNormalizer(cfg.Site.Path)
 	pathRegistry := aggregate.NewPathRegistry(cfg.Limits.MaxPaths)
 	refRegistry := normalize.NewReferrerRegistry(cfg.Limits.MaxSources)
 	metricsAgg := aggregate.NewMetricsAggregator(
 		pathRegistry,
 		aggregate.NewCustomEventRegistry(100),
-		&cfg,
+		cfg,
 	)
+	return NewIngestionHandler(cfg, pathNorm, pathRegistry, refRegistry, metricsAgg)
+}
 
-	handler := NewIngestionHandler(&cfg, pathNorm, pathRegistry, refRegistry, metricsAgg)
+func TestClassifyDevice_UA(t *testing.T) {
+	cfg := &config.Config{
+		Limits: config.LimitsConfig{
+			DeviceBreakpoints: config.DeviceBreaks{
+				Mobile: 768,
+				Tablet: 1024,
+			},
+		},
+	}
+	h := newTestHandler(cfg)
 
 	tests := []struct {
-		name  string
-		width int
+		name      string
+		width     int
+		userAgent string
+		want      string
 	}{
-		{"mobile", 375},
-		{"tablet", 768},
-		{"desktop", 1920},
+		// UA takes priority
+		{
+			name:      "iphone via UA",
+			width:     390,
+			userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15",
+			want:      "mobile",
+		},
+		{
+			name:      "android phone via UA",
+			width:     0,
+			userAgent: "Mozilla/5.0 (Linux; Android 13; Pixel 7) Mobile Safari/537.36",
+			want:      "mobile",
+		},
+		{
+			name:      "windows phone via UA",
+			width:     0,
+			userAgent: "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0)",
+			want:      "mobile",
+		},
+		{
+			name:      "ipad via UA",
+			width:     1024,
+			userAgent: "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15",
+			want:      "tablet",
+		},
+		{
+			name:      "android tablet via UA (no mobile keyword)",
+			width:     0,
+			userAgent: "Mozilla/5.0 (Linux; Android 13; SM-T870) AppleWebKit/537.36",
+			want:      "tablet",
+		},
+		// Falls back to width when UA is desktop
+		{
+			name:      "desktop UA wide screen",
+			width:     1920,
+			userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0",
+			want:      "desktop",
+		},
+		{
+			name:      "desktop UA narrow width",
+			width:     500,
+			userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0",
+			want:      "mobile",
+		},
+		// Width-only fallback
+		{
+			name:      "no UA mobile width",
+			width:     375,
+			userAgent: "",
+			want:      "mobile",
+		},
+		{
+			name:      "no UA tablet width",
+			width:     800,
+			userAgent: "",
+			want:      "tablet",
+		},
+		{
+			name:      "no UA desktop width",
+			width:     1440,
+			userAgent: "",
+			want:      "desktop",
+		},
+		// Unknown
+		{
+			name:      "no UA no width",
+			width:     0,
+			userAgent: "",
+			want:      "unknown",
+		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			body := fmt.Sprintf(`{"d":"example.com","p":"/test","w":%d}`, tt.width)
-			req := httptest.NewRequest("POST", "/api/event", bytes.NewBufferString(body))
-			w := httptest.NewRecorder()
-			handler.ServeHTTP(w, req)
-
-			if w.Code != http.StatusNoContent {
-				t.Errorf("expected status %d, got %d", http.StatusNoContent, w.Code)
+			got := h.classifyDevice(tt.width, tt.userAgent)
+			if got != tt.want {
+				t.Errorf("classifyDevice(%d, %q) = %q, want %q",
+					tt.width, tt.userAgent, got, tt.want)
 			}
 		})
 	}

From d7cdf2cc49fdd0dad0d0f10ac8519ccdf5157b5e Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 21:27:59 +0300
Subject: [PATCH 04/10] chore: fix typo in `dailySalt` comment

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I8f0d0bf4bc597f0aecfd98c292f38cdb6a6a6964
---
 internal/aggregate/uniques.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/aggregate/uniques.go b/internal/aggregate/uniques.go
index 1c7b6de..8d8149f 100644
--- a/internal/aggregate/uniques.go
+++ b/internal/aggregate/uniques.go
@@ -53,7 +53,7 @@ func (u *UniquesEstimator) Estimate() uint64 {
 	return u.hll.Estimate()
 }
 
-// Cenerates a deterministic salt based on the current date
+// Generates a deterministic salt based on the current date
 // Same day = same salt, different day = different salt
 func dailySalt(t time.Time) string {
 	// Use UTC to ensure consistent rotation regardless of timezone

From 27b3641717399aca8775b807ee7527c4fc065c78 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 21:38:43 +0300
Subject: [PATCH 05/10] various: add internal health and runtime metrics

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Iae1dcf8495a00159d588c6e2344312f36a6a6964
---
 cmd/watchdog/main.go            |  10 +++-
 cmd/watchdog/root.go            |   7 +++
 internal/health/metrics.go      |  56 +++++++++++++++++
 internal/health/metrics_test.go | 103 ++++++++++++++++++++++++++++++++
 main.go                         |  15 ++++-
 5 files changed, 187 insertions(+), 4 deletions(-)
 create mode 100644 internal/health/metrics.go
 create mode 100644 internal/health/metrics_test.go

diff --git a/cmd/watchdog/main.go b/cmd/watchdog/main.go
index 3d11120..ed8c2da 100644
--- a/cmd/watchdog/main.go
+++ b/cmd/watchdog/main.go
@@ -11,8 +11,11 @@ import (
 )
 
 var (
-	cfgFile string
-	cfg     *config.Config
+	cfgFile   string
+	cfg       *config.Config
+	version   string
+	commit    string
+	buildDate string
 )
 
 var rootCmd = &cobra.Command{
@@ -84,7 +87,8 @@ func initConfig() {
 	}
 }
 
-func Main() {
+func Main(v, c, bd string) {
+	version, commit, buildDate = v, c, bd
 	if err := rootCmd.Execute(); err != nil {
 		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
 		os.Exit(1)
diff --git a/cmd/watchdog/root.go b/cmd/watchdog/root.go
index b933cea..8d2050f 100644
--- a/cmd/watchdog/root.go
+++ b/cmd/watchdog/root.go
@@ -17,6 +17,7 @@ import (
 	"notashelf.dev/watchdog/internal/aggregate"
 	"notashelf.dev/watchdog/internal/api"
 	"notashelf.dev/watchdog/internal/config"
+	"notashelf.dev/watchdog/internal/health"
 	"notashelf.dev/watchdog/internal/limits"
 	"notashelf.dev/watchdog/internal/normalize"
 )
@@ -55,6 +56,12 @@ func Run(cfg *config.Config) error {
 	metricsAgg.MustRegister(promRegistry)
 	promRegistry.MustRegister(blockedRequests)
 
+	// Register health and runtime metrics
+	healthCollector := health.NewCollector(version, commit, buildDate)
+	if err := healthCollector.Register(promRegistry); err != nil {
+		return fmt.Errorf("failed to register health metrics: %w", err)
+	}
+
 	// Create HTTP handlers
 	ingestionHandler := api.NewIngestionHandler(
 		cfg,
diff --git a/internal/health/metrics.go b/internal/health/metrics.go
new file mode 100644
index 0000000..a49c659
--- /dev/null
+++ b/internal/health/metrics.go
@@ -0,0 +1,56 @@
+package health
+
+import (
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/collectors"
+)
+
+// Holds health and runtime metrics for the watchdog process
+type Collector struct {
+	buildInfo prometheus.Gauge
+	startTime prometheus.Gauge
+}
+
+// Creates a health metrics collector with build metadata
+func NewCollector(version, commit, buildDate string) *Collector {
+	buildInfo := prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "watchdog_build_info",
+		Help: "Build metadata for the running watchdog instance",
+		ConstLabels: prometheus.Labels{
+			"version":    version,
+			"commit":     commit,
+			"build_date": buildDate,
+		},
+	})
+	buildInfo.Set(1)
+
+	startTime := prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "watchdog_start_time_seconds",
+		Help: "Unix timestamp of when the watchdog process started",
+	})
+	startTime.Set(float64(time.Now().Unix()))
+
+	return &Collector{
+		buildInfo: buildInfo,
+		startTime: startTime,
+	}
+}
+
+// Registers all health metrics plus Go runtime collectors
+func (c *Collector) Register(reg prometheus.Registerer) error {
+	if err := reg.Register(c.buildInfo); err != nil {
+		return err
+	}
+	if err := reg.Register(c.startTime); err != nil {
+		return err
+	}
+	if err := reg.Register(collectors.NewGoCollector()); err != nil {
+		return err
+	}
+	if err := reg.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/internal/health/metrics_test.go b/internal/health/metrics_test.go
new file mode 100644
index 0000000..7df5dbb
--- /dev/null
+++ b/internal/health/metrics_test.go
@@ -0,0 +1,103 @@
+package health
+
+import (
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+func TestNewCollector_RegistersMetrics(t *testing.T) {
+	reg := prometheus.NewRegistry()
+	c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
+
+	if err := c.Register(reg); err != nil {
+		t.Fatalf("Register failed: %v", err)
+	}
+
+	metrics, err := reg.Gather()
+	if err != nil {
+		t.Fatalf("Gather failed: %v", err)
+	}
+
+	// Should have at least build_info and uptime
+	names := make(map[string]bool)
+	for _, m := range metrics {
+		names[m.GetName()] = true
+	}
+
+	if !names["watchdog_build_info"] {
+		t.Error("expected watchdog_build_info metric")
+	}
+	if !names["watchdog_start_time_seconds"] {
+		t.Error("expected watchdog_start_time_seconds metric")
+	}
+}
+
+func TestNewCollector_BuildInfoLabels(t *testing.T) {
+	reg := prometheus.NewRegistry()
+	c := NewCollector("v1.2.3", "deadbeef", "2026-03-02")
+
+	if err := c.Register(reg); err != nil {
+		t.Fatalf("Register failed: %v", err)
+	}
+
+	metrics, err := reg.Gather()
+	if err != nil {
+		t.Fatalf("Gather failed: %v", err)
+	}
+
+	for _, m := range metrics {
+		if m.GetName() != "watchdog_build_info" {
+			continue
+		}
+
+		labels := make(map[string]string)
+		for _, l := range m.GetMetric()[0].GetLabel() {
+			labels[l.GetName()] = l.GetValue()
+		}
+
+		if labels["version"] != "v1.2.3" {
+			t.Errorf("expected version label %q, got %q", "v1.2.3", labels["version"])
+		}
+		if labels["commit"] != "deadbeef" {
+			t.Errorf("expected commit label %q, got %q", "deadbeef", labels["commit"])
+		}
+		if labels["build_date"] != "2026-03-02" {
+			t.Errorf(
+				"expected build_date label %q, got %q",
+				"2026-03-02",
+				labels["build_date"],
+			)
+		}
+		return
+	}
+
+	t.Error("watchdog_build_info metric not found in gathered metrics")
+}
+
+func TestNewCollector_StartTimeIsPositive(t *testing.T) {
+	reg := prometheus.NewRegistry()
+	c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
+
+	if err := c.Register(reg); err != nil {
+		t.Fatalf("Register failed: %v", err)
+	}
+
+	metrics, err := reg.Gather()
+	if err != nil {
+		t.Fatalf("Gather failed: %v", err)
+	}
+
+	for _, m := range metrics {
+		if m.GetName() != "watchdog_start_time_seconds" {
+			continue
+		}
+		val := m.GetMetric()[0].GetGauge().GetValue()
+		if val <= 0 {
+			t.Errorf("expected positive start time, got %v", val)
+		}
+		return
+	}
+
+	t.Error("watchdog_start_time_seconds metric not found")
+}
diff --git a/main.go b/main.go
index ce24c8f..1f5d21b 100644
--- a/main.go
+++ b/main.go
@@ -2,6 +2,19 @@ package main
 
 import "notashelf.dev/watchdog/cmd/watchdog"
 
+// Injected at build time via ldflags:
+//
+// -X main.Version=v1.0.0
+// -X main.Commit=abc1234
+// -X main.BuildDate=2026-03-02
+//
+// I hate this pattern btw.
+var (
+	Version   = "dev"
+	Commit    = "unknown"
+	BuildDate = "unknown"
+)
+
 func main() {
-	watchdog.Main()
+	watchdog.Main(Version, Commit, BuildDate)
 }

From dc6b6e0c0c34ca3e10865f2625d6153ba53c4cc9 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 22:08:34 +0300
Subject: [PATCH 06/10] nix: correct ldflags

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I5806b91c9dc1dfa9690a6e01cd29059b6a6a6964
---
 nix/package.nix | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nix/package.nix b/nix/package.nix
index f5a987e..58d7757 100644
--- a/nix/package.nix
+++ b/nix/package.nix
@@ -22,7 +22,7 @@ buildGoModule (finalAttrs: {
 
   vendorHash = "sha256-jMqPVvMZDm406Gi2g4zNSRJMySLAN7/CR/2NgF+gqtA=";
 
-  ldflags = ["-s" "-w" "-X main.version=${finalAttrs.version}"];
+  ldflags = ["-s" "-w" "-X main.Version=${finalAttrs.version}"];
 
   # Copy web assets
   postInstall = ''

From 531aafb09453e340e8df812e6db901cc692c015a Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 22:08:50 +0300
Subject: [PATCH 07/10] docs: document configuration behaviour; notes on
 environment vars

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I071c766ba98ed03e0b10928c25af0d0b6a6a6964
---
 docs/configuration.md | 237 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 docs/configuration.md

diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 0000000..2bdd734
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,237 @@
+# Configuration
+
+Watchdog supports multiple configuration sources with the following precedence
+(highest to lowest):
+
+1. **Command-line flags**
+2. **Environment variables**
+3. **Configuration file**
+4. **Defaults**
+
+## Configuration File
+
+The primary configuration method is via YAML file. By default, Watchdog looks
+for:
+
+- `./config.yaml` (current directory)
+- `/etc/watchdog/config.yaml` (system-wide)
+
+Specify a custom location:
+
+```bash
+# Provide your configuration YAML file with --config
+$ watchdog --config /path/to/config.yaml
+```
+
+See [config.example.yaml](../config.example.yaml) for all available options.
+
+## Environment Variables
+
+All configuration options can be set via environment variables with the
+`WATCHDOG_` prefix.
+
+Nested fields use underscore separators. For example:
+
+```bash
+# site.domains
+$ export WATCHDOG_SITE_DOMAINS="example.com,blog.example.com"
+
+# server.listen_addr
+$ export WATCHDOG_SERVER_LISTEN_ADDR="127.0.0.1:8080"
+
+# site.collect.pageviews
+$ export WATCHDOG_SITE_COLLECT_PAGEVIEWS=true
+
+# limits.max_paths
+$ export WATCHDOG_LIMITS_MAX_PATHS=10000
+```
+
+### Common Environment Variables
+
+```bash
+# Server
+WATCHDOG_SERVER_LISTEN_ADDR="127.0.0.1:8080"
+WATCHDOG_SERVER_METRICS_PATH="/metrics"
+WATCHDOG_SERVER_INGESTION_PATH="/api/event"
+WATCHDOG_SERVER_STATE_PATH="/var/lib/watchdog/hll.state"
+
+# Site
+WATCHDOG_SITE_DOMAINS="example.com" # comma-separated for multiple
+WATCHDOG_SITE_SALT_ROTATION="daily"
+WATCHDOG_SITE_SAMPLING=1.0
+
+# Collection
+WATCHDOG_SITE_COLLECT_PAGEVIEWS=true
+WATCHDOG_SITE_COLLECT_COUNTRY=true
+WATCHDOG_SITE_COLLECT_DEVICE=true
+WATCHDOG_SITE_COLLECT_REFERRER="domain"
+WATCHDOG_SITE_COLLECT_DOMAIN=false
+
+# Limits
+WATCHDOG_LIMITS_MAX_PATHS=10000
+WATCHDOG_LIMITS_MAX_SOURCES=500
+WATCHDOG_LIMITS_MAX_CUSTOM_EVENTS=100
+WATCHDOG_LIMITS_MAX_EVENTS_PER_MINUTE=10000
+
+# Security
+WATCHDOG_SECURITY_CORS_ENABLED=false
+WATCHDOG_SECURITY_METRICS_AUTH_ENABLED=false
+WATCHDOG_SECURITY_METRICS_AUTH_USERNAME="admin"
+WATCHDOG_SECURITY_METRICS_AUTH_PASSWORD="changeme"
+```
+
+## Command-Line Flags
+
+Command-line flags override both config file and environment variables:
+
+```bash
+# Override server address
+watchdog --listen-addr :9090
+
+# Override metrics path
+watchdog --metrics-path /prometheus/metrics
+
+# Override ingestion path
+watchdog --ingestion-path /api/v1/event
+
+# Combine multiple overrides
+watchdog --config prod.yaml --listen-addr :9090 --metrics-path /metrics
+```
+
+Available flags:
+
+- `--config string` - Path to config file
+- `--listen-addr string` - Server listen address
+- `--metrics-path string` - Metrics endpoint path
+- `--ingestion-path string` - Ingestion endpoint path
+
+## Configuration Precedence Example
+
+Given:
+
+**config.yaml:**
+
+```yaml
+server:
+  listen_addr: ":8080"
+  metrics_path: "/metrics"
+```
+
+**Environment:**
+
+```bash
+export WATCHDOG_SERVER_LISTEN_ADDR=":9090"
+```
+
+**Command:**
+
+```bash
+watchdog --metrics-path "/prometheus/metrics"
+```
+
+**Result:**
+
+- `listen_addr`: `:9090` (from environment variable)
+- `metrics_path`: `/prometheus/metrics` (from CLI flag)
+
+## Systemd Integration
+
+Environment variables work seamlessly with systemd:
+
+```ini
+[Service]
+Environment="WATCHDOG_SERVER_LISTEN_ADDR=127.0.0.1:8080"
+Environment="WATCHDOG_SITE_DOMAINS=example.com"
+Environment="WATCHDOG_LIMITS_MAX_PATHS=10000"
+ExecStart=/usr/local/bin/watchdog --config /etc/watchdog/config.yaml
+```
+
+Or use `EnvironmentFile`:
+
+```ini
+[Service]
+EnvironmentFile=/etc/watchdog/env
+ExecStart=/usr/local/bin/watchdog
+```
+
+**/etc/watchdog/env:**
+
+```bash
+WATCHDOG_SERVER_LISTEN_ADDR=127.0.0.1:8080
+WATCHDOG_SITE_DOMAINS=example.com
+WATCHDOG_LIMITS_MAX_PATHS=10000
+```
+
+## NixOS Integration
+
+NixOS configuration automatically converts to the correct format:
+
+```nix
+{
+  services.watchdog = {
+    enable = true;
+    settings = {
+      site.domains = [ "example.com" ];
+      server.listen_addr = "127.0.0.1:8080";
+      limits.max_paths = 10000;
+    };
+  };
+}
+```
+
+This is equivalent to setting environment variables or using a config file.
+
+## Validation
+
+Configuration is validated on startup. Invalid values will cause Watchdog to
+exit with an error:
+
+```bash
+$ watchdog
+Error: config validation failed: site.domains is required
+```
+
+Common validation errors:
+
+- `site.domains is required` - No domains configured
+- `limits.max_paths must be greater than 0` - Invalid cardinality limit
+- `site.collect.referrer must be 'off', 'domain', or 'url'` - Invalid referrer
+  mode
+- `site.sampling must be between 0.0 and 1.0` - Invalid sampling rate
+
+## Best Practices
+
+1. **Use config file for base configuration** - Easier to version control and
+   review
+2. **Use environment variables for secrets** - Don't commit passwords to config
+   files
+3. **Use CLI flags for testing/overrides** - Quick temporary changes without
+   editing files
+
+Example hybrid approach:
+
+**config.yaml:**
+
+```yaml
+site:
+  domains:
+    - example.com
+  collect:
+    pageviews: true
+    device: true
+
+limits:
+  max_paths: 10000
+```
+
+**Environment (secrets):**
+
+```bash
+export WATCHDOG_SECURITY_METRICS_AUTH_PASSWORD="$SECRET_PASSWORD"
+```
+
+**CLI (testing):**
+
+```bash
+watchdog --listen-addr :9090  # Test on different port
+```

From 3363e5c9232849624a57d1820070c43d9ea83891 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 22:24:52 +0300
Subject: [PATCH 08/10] docs: include process metrics under available exports

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I0df00ecfddf98db1ebc85c2fc7758e326a6a6964
---
 README.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fae2a98..f64506a 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ installation mechanism.
 $ go build -o watchdog .
 
 # Run
-$ ./watchdog -config config.yaml
+$ ./watchdog --config config.yaml
 ```
 
 ## Configuration
@@ -207,11 +207,19 @@ While not final, some of the metrics collected are as follows:
 - `web_custom_events_total{event}` - Custom event counts
 - `web_daily_unique_visitors` - Estimated unique visitors (HyperLogLog)
 
-**Health metrics:**
+**Cardinality metrics:**
 
 - `web_path_overflow_total` - Paths rejected due to cardinality limit
 - `web_referrer_overflow_total` - Referrers rejected due to limit
 - `web_event_overflow_total` - Custom events rejected due to limit
+- `web_blocked_requests_total{reason}` - File server requests blocked by security filters
+
+**Process metrics:**
+
+- `watchdog_build_info{version,commit,build_date}` - Build metadata
+- `watchdog_start_time_seconds` - Unix timestamp of process start
+- `go_*` - Go runtime metrics (goroutines, GC, memory)
+- `process_*` - OS process metrics (CPU, RSS, file descriptors)
 
 ## Privacy
 

From 13343ef2bda5805731a27e0e988918ecfd621c9f Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 22:25:13 +0300
Subject: [PATCH 09/10] nix: format Markdown with `deno fmt`

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Id652cb01903d1ca4de4b8839118fac556a6a6964
---
 flake.nix | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/flake.nix b/flake.nix
index bd38fad..15dc2d4 100644
--- a/flake.nix
+++ b/flake.nix
@@ -27,6 +27,7 @@
           pkgs.alejandra
           pkgs.fd
           pkgs.prettier
+          pkgs.deno
           pkgs.go # provides gofmt
           pkgs.golines
         ];
@@ -38,6 +39,9 @@
           # Format HTML & Javascript files with Prettier
           fd "$@" -t f -e html -e js -x prettier -w '{}'
 
+          # Format Markdown with Deno's Markdown formatter
+          fd "$@" -t f -e md -x deno fmt -q '{}'
+
           # Format go files with both gofmt & golines
           fd "$@" -t f -e go -x golines -l -w --max-len=110 \
             --base-formatter=gofmt \

From df06ed38bf540f5a626db04bc570050eac3286f6 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Mon, 2 Mar 2026 22:35:32 +0300
Subject: [PATCH 10/10] docs: provide obserability stack guide

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ibadc31d02413da836e85eaa3d446eb9e6a6a6964
---
 README.md             |   3 +-
 docs/observability.md | 300 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 docs/observability.md

diff --git a/README.md b/README.md
index f64506a..325dace 100644
--- a/README.md
+++ b/README.md
@@ -212,7 +212,8 @@ While not final, some of the metrics collected are as follows:
 - `web_path_overflow_total` - Paths rejected due to cardinality limit
 - `web_referrer_overflow_total` - Referrers rejected due to limit
 - `web_event_overflow_total` - Custom events rejected due to limit
-- `web_blocked_requests_total{reason}` - File server requests blocked by security filters
+- `web_blocked_requests_total{reason}` - File server requests blocked by
+  security filters
 
 **Process metrics:**
 
diff --git a/docs/observability.md b/docs/observability.md
new file mode 100644
index 0000000..1f3b9dd
--- /dev/null
+++ b/docs/observability.md
@@ -0,0 +1,300 @@
+# Observability Setup
+
+Watchdog exposes Prometheus-formatted metrics at `/metrics`. You need a
+time-series database to scrape and store these metrics, then visualize them in
+Grafana.
+
+> [!IMPORTANT]
+>
+> **Why you need Prometheus:**
+>
+> - Watchdog exposes _current state_ (counters, gauges)
+> - Prometheus _scrapes periodically_ and _stores time-series data_
+> - Grafana _visualizes_ the historical data from Prometheus
+> - Grafana cannot directly scrape Prometheus `/metrics` endpoints
+
+## Prometheus Setup
+
+### Configuring Prometheus
+
+Create `/etc/prometheus/prometheus.yml`:
+
+```yaml
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  - job_name: "watchdog"
+    static_configs:
+      - targets: ["localhost:8080"]
+
+    # Optional: scrape multiple Watchdog instances
+    # static_configs:
+    #   - targets:
+    #       - 'watchdog-1.example.com:8080'
+    #       - 'watchdog-2.example.com:8080'
+    #     labels:
+    #       instance: 'production'
+
+  # Scrape Prometheus itself
+  - job_name: "prometheus"
+    static_configs:
+      - targets: ["localhost:9090"]
+```
+
+### Verify Prometheus' health state
+
+```bash
+# Check Prometheus is running
+curl http://localhost:9090/-/healthy
+
+# Check it's scraping Watchdog
+curl http://localhost:9090/api/v1/targets
+```
+
+### NixOS
+
+Add to your NixOS configuration:
+
+```nix
+{
+  services.prometheus = {
+    enable = true;
+    port = 9090;
+
+    # Retention period
+    retentionTime = "30d";
+
+    scrapeConfigs = [
+      {
+        job_name = "watchdog";
+        static_configs = [{
+          targets = [ "localhost:8080" ];
+        }];
+      }
+    ];
+  };
+
+  # Open firewall if needed
+  # networking.firewall.allowedTCPPorts = [ 9090 ];
+}
+```
+
+For multiple Watchdog instances:
+
+```nix
+{
+  services.prometheus.scrapeConfigs = [
+    {
+      job_name = "watchdog";
+      static_configs = [
+        {
+          labels.env = "production";
+          targets = [
+            "watchdog-1:8080"
+            "watchdog-2:8080"
+            "watchdog-3:8080"
+          ];
+        }
+      ];
+    }
+  ];
+}
+```
+
+## Grafana Setup
+
+### NixOS
+
+```nix
+{
+  services.grafana = {
+    enable = true;
+    settings = {
+      server = {
+        http_addr = "127.0.0.1";
+        http_port = 3000;
+      };
+    };
+
+    provision = {
+      enable = true;
+
+      datasources.settings.datasources = [{
+        name = "Prometheus";
+        type = "prometheus";
+        url = "http://localhost:9090";
+        isDefault = true;
+      }];
+    };
+  };
+}
+```
+
+### Configure Data Source (Manual)
+
+If you're not using NixOS for provisioning, then you'll need to do provisioning
+_imperatively_ from your Grafana configuration. Ths can be done through the
+admin panel by navigating to `Configuration`, and choosing "add data source"
+under `Data Sources`. Select your prometheus instance, and save it.
+
+### Import Pre-built Dashboard
+
+A sample Grafana dashboard is provided with support for multi-host and
+multi-site configurations. Import it, configure the data source and it should
+work out of the box.
+
+If you're not using NixOS for provisioning, the dashboard _also_ needs to be
+provisioned manually. Under `Dashboards`, select `Import` and provide the JSON
+contents or upload the sample dashboard from `contrib/grafana/watchdog.json`.
+Select your Prometheus data source and import it.
+
+See [contrib/grafana/README.md](../contrib/grafana/README.md) for full
+documentation.
+
+## Example Queries
+
+Once Prometheus is scraping Watchdog and Grafana is connected, you may write
+your own widgets or create queries. Here are some example queries using
+Prometheus query language, promql. Those are provided as examples and might not
+provide everything you need. Nevertheless, use them to improve your setup at
+your disposal.
+
+If you believe you have some valuable widgets that you'd like to contribute
+back, feel free!
+
+### Top 10 Pages by Traffic
+
+```promql
+topk(10, sum by (path) (rate(web_pageviews_total[5m])))
+```
+
+### Mobile vs Desktop Split
+
+```promql
+sum by (device) (rate(web_pageviews_total[1h]))
+```
+
+### Unique Visitors
+
+```promql
+web_daily_unique_visitors
+```
+
+### Top Referrers
+
+```promql
+topk(10, sum by (referrer) (rate(web_pageviews_total{referrer!="direct"}[1d])))
+```
+
+### Multi-Site: Traffic per Domain
+
+```promql
+sum by (domain) (rate(web_pageviews_total[1h]))
+```
+
+### Cardinality Health
+
+```promql
+# Should be near zero
+rate(web_path_overflow_total[5m])
+rate(web_referrer_overflow_total[5m])
+rate(web_event_overflow_total[5m])
+```
+
+## Horizontal Scaling Considerations
+
+When running multiple Watchdog instances:
+
+1. **Each instance exposes its own metrics** - Prometheus scrapes all instances
+2. **Prometheus aggregates automatically** - use `sum()` in queries to aggregate
+   across instances
+3. **No shared state needed** - each Watchdog instance is independent
+
+Watchdog is almost entirely stateless, so horizontal scaling should be trivial
+as long as you have the necessary infrastructure and, well, the patience.
+Example with 3 instances:
+
+```promql
+# Total pageviews across all instances
+sum(rate(web_pageviews_total[5m]))
+
+# Per-instance breakdown
+sum by (instance) (rate(web_pageviews_total[5m]))
+```
+
+## Alternatives to Prometheus
+
+### VictoriaMetrics
+
+Drop-in Prometheus replacement with better performance and compression:
+
+```nix
+{
+  services.victoriametrics = {
+    enable = true;
+    listenAddress = ":8428";
+    retentionPeriod = "12month";
+  };
+
+  # Configure Prometheus to remote-write to VictoriaMetrics
+  services.prometheus = {
+    enable = true;
+    remoteWrite = [{
+      url = "http://localhost:8428/api/v1/write";
+    }];
+  };
+}
+```
+
+### Grafana Agent
+
+Lightweight alternative that scrapes and forwards to Grafana Cloud or local
+Prometheus:
+
+```bash
+# Systemd setup for Grafana Agent
+sudo systemctl enable --now grafana-agent
+```
+
+```yaml
+# /etc/grafana-agent.yaml
+metrics:
+  wal_directory: /var/lib/grafana-agent
+  configs:
+    - name: watchdog
+      scrape_configs:
+        - job_name: watchdog
+          static_configs:
+            - targets: ["localhost:8080"]
+      remote_write:
+        - url: http://localhost:9090/api/v1/write
+```
+
+## Monitoring the Monitoring
+
+Monitor Prometheus itself:
+
+```promql
+# Prometheus scrape success rate
+up{job="watchdog"}
+
+# Scrape duration
+scrape_duration_seconds{job="watchdog"}
+
+# Time since last scrape
+time() - timestamp(up{job="watchdog"})
+```
+
+## Additional Recommendations
+
+1. **Retention**: Set `--storage.tsdb.retention.time=30d` or longer based on
+   disk space
+2. **Backups**: Back up `/var/lib/prometheus` periodically (or whatever your
+   state directory is)
+3. **Alerting**: Configure Prometheus alerting rules for critical metrics
+4. **High Availability**: Run multiple Prometheus instances with identical
+   configs
+5. **Remote Storage**: For long-term storage, use Thanos, Cortex, or
+   VictoriaMetrics