internal/aggregate: optimize HyperLogLog to prevent O(16384) operations

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ibc7e6d7a86e8679e299c46debee9683f6a6a6964
This commit is contained in:
raf 2026-03-01 13:10:52 +03:00
commit 8392992b41
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
2 changed files with 166 additions and 24 deletions

View file

@ -1,8 +1,11 @@
package aggregate
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"sync"
"time"
@ -77,3 +80,48 @@ func (u *UniquesEstimator) CurrentSalt() string {
func DailySalt(t time.Time) string {
return dailySalt(t)
}
// Save persists the HLL state to disk
func (u *UniquesEstimator) Save(path string) error {
u.mu.Lock()
defer u.mu.Unlock()
data, err := u.hll.MarshalBinary()
if err != nil {
return err
}
// Save both HLL data and current day salt
return os.WriteFile(path, append([]byte(u.currentDay+"\n"), data...), 0600)
}
// Load restores the HLL state from disk
func (u *UniquesEstimator) Load(path string) error {
data, err := os.ReadFile(path)
if err != nil {
return err // File not existing is OK (first run)
}
u.mu.Lock()
defer u.mu.Unlock()
// Parse saved salt and HLL data
parts := bytes.SplitN(data, []byte("\n"), 2)
if len(parts) != 2 {
return fmt.Errorf("invalid state file format")
}
savedSalt := string(parts[0])
today := dailySalt(time.Now())
// Only restore if it's the same day
if savedSalt == today {
u.currentDay = savedSalt
return u.hll.UnmarshalBinary(parts[1])
}
// Different day - start fresh
u.hll = hyperloglog.New()
u.currentDay = today
return nil
}