internal/aggregate: add HyperLogLog unique visitor tracking
Extracts IP from X-Forwarded-For/X-Real-IP/RemoteAddr. Only active when `config.Site.SaltRotation` is set. Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Ieef93b81e9894fc2e9e129451bf2dfdf6a6a6964
This commit is contained in:
parent
b6f2380a20
commit
993e47e603
6 changed files with 269 additions and 0 deletions
3
go.mod
3
go.mod
|
|
@ -7,8 +7,11 @@ require gopkg.in/yaml.v3 v3.0.1
|
||||||
require golang.org/x/net v0.51.0
|
require golang.org/x/net v0.51.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/axiomhq/hyperloglog v0.2.6 // indirect
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
|
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 // indirect
|
||||||
|
github.com/kamstrup/intmap v0.5.2 // indirect
|
||||||
github.com/kylelemons/godebug v1.1.0 // indirect
|
github.com/kylelemons/godebug v1.1.0 // indirect
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/prometheus/client_golang v1.23.2 // indirect
|
github.com/prometheus/client_golang v1.23.2 // indirect
|
||||||
|
|
|
||||||
6
go.sum
6
go.sum
|
|
@ -1,7 +1,13 @@
|
||||||
|
github.com/axiomhq/hyperloglog v0.2.6 h1:sRhvvF3RIXWQgAXaTphLp4yJiX4S0IN3MWTaAgZoRJw=
|
||||||
|
github.com/axiomhq/hyperloglog v0.2.6/go.mod h1:YjX/dQqCR/7QYX0g8mu8UZAjpIenz1FKM71UEsjFoTo=
|
||||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 h1:ucRHb6/lvW/+mTEIGbvhcYU3S8+uSNkuMjx/qZFfhtM=
|
||||||
|
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
|
||||||
|
github.com/kamstrup/intmap v0.5.2 h1:qnwBm1mh4XAnW9W9Ue9tZtTff8pS6+s6iKF6JRIV2Dk=
|
||||||
|
github.com/kamstrup/intmap v0.5.2/go.mod h1:gWUVWHKzWj8xpJVFf5GC0O26bWmv3GqdnIX/LMT6Aq4=
|
||||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@ type MetricsAggregator struct {
|
||||||
pageviews *prometheus.CounterVec
|
pageviews *prometheus.CounterVec
|
||||||
customEvents *prometheus.CounterVec
|
customEvents *prometheus.CounterVec
|
||||||
pathOverflow prometheus.Counter
|
pathOverflow prometheus.Counter
|
||||||
|
dailyUniques prometheus.Gauge
|
||||||
|
estimator *UniquesEstimator
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates a new metrics aggregator with dynamic labels based on config
|
// Creates a new metrics aggregator with dynamic labels based on config
|
||||||
|
|
@ -54,12 +56,21 @@ func NewMetricsAggregator(registry *PathRegistry, cfg config.Config) *MetricsAgg
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
dailyUniques := prometheus.NewGauge(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "web_daily_unique_visitors",
|
||||||
|
Help: "Estimated unique visitors today (HyperLogLog)",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
return &MetricsAggregator{
|
return &MetricsAggregator{
|
||||||
registry: registry,
|
registry: registry,
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
pageviews: pageviews,
|
pageviews: pageviews,
|
||||||
customEvents: customEvents,
|
customEvents: customEvents,
|
||||||
pathOverflow: pathOverflow,
|
pathOverflow: pathOverflow,
|
||||||
|
dailyUniques: dailyUniques,
|
||||||
|
estimator: NewUniquesEstimator(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -93,9 +104,20 @@ func (m *MetricsAggregator) RecordPathOverflow() {
|
||||||
m.pathOverflow.Inc()
|
m.pathOverflow.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Adds a visitor to the unique visitor estimator. Only tracks if salt_rotation is configured
|
||||||
|
func (m *MetricsAggregator) AddUnique(ip, userAgent string) {
|
||||||
|
if m.cfg.Site.SaltRotation == "" {
|
||||||
|
return // only track if salt rotation is enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
m.estimator.Add(ip, userAgent)
|
||||||
|
m.dailyUniques.Set(float64(m.estimator.Estimate()))
|
||||||
|
}
|
||||||
|
|
||||||
// Registers all metrics with the provided Prometheus registry
|
// Registers all metrics with the provided Prometheus registry
|
||||||
func (m *MetricsAggregator) MustRegister(reg prometheus.Registerer) {
|
func (m *MetricsAggregator) MustRegister(reg prometheus.Registerer) {
|
||||||
reg.MustRegister(m.pageviews)
|
reg.MustRegister(m.pageviews)
|
||||||
reg.MustRegister(m.customEvents)
|
reg.MustRegister(m.customEvents)
|
||||||
reg.MustRegister(m.pathOverflow)
|
reg.MustRegister(m.pathOverflow)
|
||||||
|
reg.MustRegister(m.dailyUniques)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
79
internal/aggregate/uniques.go
Normal file
79
internal/aggregate/uniques.go
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
package aggregate
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/axiomhq/hyperloglog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// UniquesEstimator tracks unique visitors using HyperLogLog with daily salt rotation
|
||||||
|
type UniquesEstimator struct {
|
||||||
|
hll *hyperloglog.Sketch
|
||||||
|
currentDay string
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewUniquesEstimator creates a new unique visitor estimator
|
||||||
|
func NewUniquesEstimator() *UniquesEstimator {
|
||||||
|
return &UniquesEstimator{
|
||||||
|
hll: hyperloglog.New(),
|
||||||
|
currentDay: dailySalt(time.Now()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add records a visitor with privacy-preserving hashing
|
||||||
|
// Uses IP + UserAgent + daily salt to prevent cross-day correlation
|
||||||
|
func (u *UniquesEstimator) Add(ip, userAgent string) {
|
||||||
|
u.mu.Lock()
|
||||||
|
defer u.mu.Unlock()
|
||||||
|
|
||||||
|
// Check if we need to rotate to a new day
|
||||||
|
today := dailySalt(time.Now())
|
||||||
|
if today != u.currentDay {
|
||||||
|
// Reset HLL for new day
|
||||||
|
u.hll = hyperloglog.New()
|
||||||
|
u.currentDay = today
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hash visitor with daily salt to prevent cross-day tracking
|
||||||
|
hash := hashVisitor(ip, userAgent, u.currentDay)
|
||||||
|
u.hll.Insert([]byte(hash))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimate returns the estimated number of unique visitors
|
||||||
|
func (u *UniquesEstimator) Estimate() uint64 {
|
||||||
|
u.mu.Lock()
|
||||||
|
defer u.mu.Unlock()
|
||||||
|
return u.hll.Estimate()
|
||||||
|
}
|
||||||
|
|
||||||
|
// dailySalt generates a deterministic salt based on the current date
|
||||||
|
// Same day = same salt, different day = different salt
|
||||||
|
func dailySalt(t time.Time) string {
|
||||||
|
// Use UTC to ensure consistent rotation regardless of timezone
|
||||||
|
date := t.UTC().Format("2006-01-02")
|
||||||
|
h := sha256.Sum256([]byte("watchdog-salt-" + date))
|
||||||
|
return hex.EncodeToString(h[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
// hashVisitor creates a privacy-preserving hash of visitor identity
|
||||||
|
func hashVisitor(ip, userAgent, salt string) string {
|
||||||
|
combined := ip + "|" + userAgent + "|" + salt
|
||||||
|
h := sha256.Sum256([]byte(combined))
|
||||||
|
return hex.EncodeToString(h[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
// CurrentSalt returns the current salt for testing
|
||||||
|
func (u *UniquesEstimator) CurrentSalt() string {
|
||||||
|
u.mu.Lock()
|
||||||
|
defer u.mu.Unlock()
|
||||||
|
return u.currentDay
|
||||||
|
}
|
||||||
|
|
||||||
|
// DailySalt is exported for testing
|
||||||
|
func DailySalt(t time.Time) string {
|
||||||
|
return dailySalt(t)
|
||||||
|
}
|
||||||
125
internal/aggregate/uniques_test.go
Normal file
125
internal/aggregate/uniques_test.go
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
package aggregate
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDailySalt(t *testing.T) {
|
||||||
|
// Same day should produce same salt
|
||||||
|
day1Morning := time.Date(2024, 1, 15, 9, 0, 0, 0, time.UTC)
|
||||||
|
day1Evening := time.Date(2024, 1, 15, 23, 59, 59, 0, time.UTC)
|
||||||
|
|
||||||
|
salt1 := DailySalt(day1Morning)
|
||||||
|
salt2 := DailySalt(day1Evening)
|
||||||
|
|
||||||
|
if salt1 != salt2 {
|
||||||
|
t.Errorf("Expected same salt for same day, got %s and %s", salt1, salt2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Different day should produce different salt
|
||||||
|
day2 := time.Date(2024, 1, 16, 9, 0, 0, 0, time.UTC)
|
||||||
|
salt3 := DailySalt(day2)
|
||||||
|
|
||||||
|
if salt1 == salt3 {
|
||||||
|
t.Errorf("Expected different salt for different days, got same: %s", salt1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Salt should be deterministic
|
||||||
|
salt4 := DailySalt(day1Morning)
|
||||||
|
if salt1 != salt4 {
|
||||||
|
t.Errorf("Salt should be deterministic, got %s and %s", salt1, salt4)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUniquesEstimator(t *testing.T) {
|
||||||
|
estimator := NewUniquesEstimator()
|
||||||
|
|
||||||
|
// Initially should be zero
|
||||||
|
if count := estimator.Estimate(); count != 0 {
|
||||||
|
t.Errorf("Expected initial count of 0, got %d", count)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a visitor
|
||||||
|
estimator.Add("192.168.1.1", "Mozilla/5.0")
|
||||||
|
if count := estimator.Estimate(); count != 1 {
|
||||||
|
t.Errorf("Expected count of 1 after adding visitor, got %d", count)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adding same visitor should not increase count
|
||||||
|
estimator.Add("192.168.1.1", "Mozilla/5.0")
|
||||||
|
if count := estimator.Estimate(); count != 1 {
|
||||||
|
t.Errorf("Expected count of 1 after adding duplicate, got %d", count)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adding different visitor should increase count
|
||||||
|
estimator.Add("192.168.1.2", "Chrome/90.0")
|
||||||
|
if count := estimator.Estimate(); count < 2 {
|
||||||
|
t.Errorf("Expected count of at least 2 after adding different visitor, got %d", count)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with many unique visitors
|
||||||
|
for i := range 1000 {
|
||||||
|
ip := "10.0." + string(rune(i/256)) + "." + string(rune(i%256))
|
||||||
|
estimator.Add(ip, "TestAgent")
|
||||||
|
}
|
||||||
|
|
||||||
|
count := estimator.Estimate()
|
||||||
|
// HyperLogLog has ~2% error rate, so 1000 uniques should be 980-1020
|
||||||
|
if count < 980 || count > 1020 {
|
||||||
|
t.Errorf("Expected count around 1000, got %d (error rate too high)", count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUniquesEstimatorDailyRotation(t *testing.T) {
|
||||||
|
// This test verifies that salts are different on different days
|
||||||
|
// The actual rotation happens in Add() based on current time
|
||||||
|
|
||||||
|
// Verify that different days produce different salts
|
||||||
|
day1 := time.Date(2024, 1, 15, 12, 0, 0, 0, time.UTC)
|
||||||
|
day2 := time.Date(2024, 1, 16, 12, 0, 0, 0, time.UTC)
|
||||||
|
|
||||||
|
salt1 := DailySalt(day1)
|
||||||
|
salt2 := DailySalt(day2)
|
||||||
|
|
||||||
|
if salt1 == salt2 {
|
||||||
|
t.Error("Expected different salts for different days")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify estimator uses current day's salt
|
||||||
|
estimator := NewUniquesEstimator()
|
||||||
|
currentSalt := estimator.CurrentSalt()
|
||||||
|
expectedSalt := DailySalt(time.Now())
|
||||||
|
|
||||||
|
if currentSalt != expectedSalt {
|
||||||
|
t.Errorf("Expected estimator to use current day's salt, got %s, expected %s", currentSalt, expectedSalt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHashVisitor(t *testing.T) {
|
||||||
|
// Same inputs should produce same hash
|
||||||
|
hash1 := hashVisitor("192.168.1.1", "Mozilla/5.0", "salt123")
|
||||||
|
hash2 := hashVisitor("192.168.1.1", "Mozilla/5.0", "salt123")
|
||||||
|
|
||||||
|
if hash1 != hash2 {
|
||||||
|
t.Error("Expected same hash for same inputs")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Different IP should produce different hash
|
||||||
|
hash3 := hashVisitor("192.168.1.2", "Mozilla/5.0", "salt123")
|
||||||
|
if hash1 == hash3 {
|
||||||
|
t.Error("Expected different hash for different IP")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Different user agent should produce different hash
|
||||||
|
hash4 := hashVisitor("192.168.1.1", "Chrome/90.0", "salt123")
|
||||||
|
if hash1 == hash4 {
|
||||||
|
t.Error("Expected different hash for different user agent")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Different salt should produce different hash
|
||||||
|
hash5 := hashVisitor("192.168.1.1", "Mozilla/5.0", "salt456")
|
||||||
|
if hash1 == hash5 {
|
||||||
|
t.Error("Expected different hash for different salt")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -3,6 +3,7 @@ package api
|
||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"notashelf.dev/watchdog/internal/aggregate"
|
"notashelf.dev/watchdog/internal/aggregate"
|
||||||
"notashelf.dev/watchdog/internal/config"
|
"notashelf.dev/watchdog/internal/config"
|
||||||
|
|
@ -70,6 +71,13 @@ func (h *IngestionHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract visitor identity for unique tracking
|
||||||
|
ip := extractIP(r)
|
||||||
|
userAgent := r.Header.Get("User-Agent")
|
||||||
|
|
||||||
|
// Track unique visitor if salt rotation is enabled
|
||||||
|
h.metricsAgg.AddUnique(ip, userAgent)
|
||||||
|
|
||||||
// Process based on event type
|
// Process based on event type
|
||||||
if event.Event != "" {
|
if event.Event != "" {
|
||||||
// Custom event
|
// Custom event
|
||||||
|
|
@ -102,6 +110,32 @@ func (h *IngestionHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
w.WriteHeader(http.StatusNoContent)
|
w.WriteHeader(http.StatusNoContent)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extractIP extracts the client IP from the request
|
||||||
|
// Checks X-Forwarded-For and X-Real-IP headers for proxied requests
|
||||||
|
func extractIP(r *http.Request) string {
|
||||||
|
// Check X-Forwarded-For header (may contain multiple IPs)
|
||||||
|
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
|
||||||
|
// Take the first IP in the list
|
||||||
|
ips := strings.Split(xff, ",")
|
||||||
|
if len(ips) > 0 {
|
||||||
|
return strings.TrimSpace(ips[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check X-Real-IP header
|
||||||
|
if xri := r.Header.Get("X-Real-IP"); xri != "" {
|
||||||
|
return xri
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to RemoteAddr
|
||||||
|
ip := r.RemoteAddr
|
||||||
|
// Strip port if present
|
||||||
|
if idx := strings.LastIndex(ip, ":"); idx != -1 {
|
||||||
|
ip = ip[:idx]
|
||||||
|
}
|
||||||
|
return ip
|
||||||
|
}
|
||||||
|
|
||||||
// Classifies screen width into device categories
|
// Classifies screen width into device categories
|
||||||
func classifyDevice(width int) string {
|
func classifyDevice(width int) string {
|
||||||
// FIXME: probably not the best logic for this...
|
// FIXME: probably not the best logic for this...
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue