internal/aggregate: implement hourly salt rotation for unique visitors

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I5861c5bb55153349d0710cc07c1595a96a6a6964
This commit is contained in:
raf 2026-03-07 08:28:03 +03:00
commit d975c7b2d1
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
3 changed files with 41 additions and 34 deletions

View file

@ -109,7 +109,7 @@ func NewMetricsAggregator(
refOverflow: refOverflow, refOverflow: refOverflow,
eventOverflow: eventOverflow, eventOverflow: eventOverflow,
dailyUniques: dailyUniques, dailyUniques: dailyUniques,
estimator: NewUniquesEstimator(), estimator: NewUniquesEstimator(cfg.Site.SaltRotation),
stopChan: make(chan struct{}), stopChan: make(chan struct{}),
} }

View file

@ -12,37 +12,39 @@ import (
"github.com/axiomhq/hyperloglog" "github.com/axiomhq/hyperloglog"
) )
// Tracks unique visitors using HyperLogLog with daily salt rotation // Tracks unique visitors using HyperLogLog with configurable salt rotation
type UniquesEstimator struct { type UniquesEstimator struct {
hll *hyperloglog.Sketch hll *hyperloglog.Sketch
currentDay string salt string
mu sync.Mutex rotation string // "daily" or "hourly"
mu sync.Mutex
} }
// Creates a new unique visitor estimator // Creates a new unique visitor estimator
func NewUniquesEstimator() *UniquesEstimator { func NewUniquesEstimator(rotation string) *UniquesEstimator {
return &UniquesEstimator{ return &UniquesEstimator{
hll: hyperloglog.New(), hll: hyperloglog.New(),
currentDay: dailySalt(time.Now()), salt: generateSalt(time.Now(), rotation),
rotation: rotation,
} }
} }
// Add records a visitor with privacy-preserving hashing // Add records a visitor with privacy-preserving hashing
// Uses IP + UserAgent + daily salt to prevent cross-day correlation // Uses IP + UserAgent + salt to prevent cross-period correlation
func (u *UniquesEstimator) Add(ip, userAgent string) { func (u *UniquesEstimator) Add(ip, userAgent string) {
u.mu.Lock() u.mu.Lock()
defer u.mu.Unlock() defer u.mu.Unlock()
// Check if we need to rotate to a new day // Check if we need to rotate to a new period
today := dailySalt(time.Now()) currentSalt := generateSalt(time.Now(), u.rotation)
if today != u.currentDay { if currentSalt != u.salt {
// Reset HLL for new day // Reset HLL for new period
u.hll = hyperloglog.New() u.hll = hyperloglog.New()
u.currentDay = today u.salt = currentSalt
} }
// Hash visitor with daily salt to prevent cross-day tracking // Hash visitor with salt to prevent cross-period tracking
hash := hashVisitor(ip, userAgent, u.currentDay) hash := hashVisitor(ip, userAgent, u.salt)
u.hll.Insert([]byte(hash)) u.hll.Insert([]byte(hash))
} }
@ -53,12 +55,17 @@ func (u *UniquesEstimator) Estimate() uint64 {
return u.hll.Estimate() return u.hll.Estimate()
} }
// Generates a deterministic salt based on the current date // Generates a deterministic salt based on the rotation mode
// Same day = same salt, different day = different salt // Daily: same day = same salt, different day = different salt
func dailySalt(t time.Time) string { // Hourly: same hour = same salt, different hour = different salt
// Use UTC to ensure consistent rotation regardless of timezone func generateSalt(t time.Time, rotation string) string {
date := t.UTC().Format("2006-01-02") var key string
h := sha256.Sum256([]byte("watchdog-salt-" + date)) if rotation == "hourly" {
key = t.UTC().Format("2006-01-02T15")
} else {
key = t.UTC().Format("2006-01-02")
}
h := sha256.Sum256([]byte("watchdog-salt-" + key))
return hex.EncodeToString(h[:]) return hex.EncodeToString(h[:])
} }
@ -73,12 +80,12 @@ func hashVisitor(ip, userAgent, salt string) string {
func (u *UniquesEstimator) CurrentSalt() string { func (u *UniquesEstimator) CurrentSalt() string {
u.mu.Lock() u.mu.Lock()
defer u.mu.Unlock() defer u.mu.Unlock()
return u.currentDay return u.salt
} }
// Exported for testing // Exported for testing
func DailySalt(t time.Time) string { func DailySalt(t time.Time) string {
return dailySalt(t) return generateSalt(t, "daily")
} }
// Save persists the HLL state to disk // Save persists the HLL state to disk
@ -91,8 +98,8 @@ func (u *UniquesEstimator) Save(path string) error {
return err return err
} }
// Save both HLL data and current day salt // Save both HLL data and current salt
return os.WriteFile(path, append([]byte(u.currentDay+"\n"), data...), 0600) return os.WriteFile(path, append([]byte(u.salt+"\n"), data...), 0600)
} }
// Load restores the HLL state from disk // Load restores the HLL state from disk
@ -115,16 +122,16 @@ func (u *UniquesEstimator) Load(path string) error {
} }
savedSalt := string(parts[0]) savedSalt := string(parts[0])
today := dailySalt(time.Now()) currentSalt := generateSalt(time.Now(), u.rotation)
// Only restore if it's the same day // Only restore if it's the same period
if savedSalt == today { if savedSalt == currentSalt {
u.currentDay = savedSalt u.salt = savedSalt
return u.hll.UnmarshalBinary(parts[1]) return u.hll.UnmarshalBinary(parts[1])
} }
// Different day, start fresh // Different period, start fresh
u.hll = hyperloglog.New() u.hll = hyperloglog.New()
u.currentDay = today u.salt = currentSalt
return nil return nil
} }

View file

@ -33,7 +33,7 @@ func TestDailySalt(t *testing.T) {
} }
func TestUniquesEstimator(t *testing.T) { func TestUniquesEstimator(t *testing.T) {
estimator := NewUniquesEstimator() estimator := NewUniquesEstimator("daily")
// Initially should be zero // Initially should be zero
if count := estimator.Estimate(); count != 0 { if count := estimator.Estimate(); count != 0 {
@ -87,7 +87,7 @@ func TestUniquesEstimatorDailyRotation(t *testing.T) {
} }
// Verify estimator uses current day's salt // Verify estimator uses current day's salt
estimator := NewUniquesEstimator() estimator := NewUniquesEstimator("daily")
currentSalt := estimator.CurrentSalt() currentSalt := estimator.CurrentSalt()
expectedSalt := DailySalt(time.Now()) expectedSalt := DailySalt(time.Now())