From ce848ed6f0aa2825917d116df2da2b09f06616a4 Mon Sep 17 00:00:00 2001 From: NotAShelf Date: Sun, 1 Mar 2026 04:42:59 +0300 Subject: [PATCH] internal: add bounded path registry to prevent cardinality explosion "cardinality explosion" would make for an epic rock band name... Signed-off-by: NotAShelf Change-Id: I53cceb00ab9b17039b1fb1389977bf6b6a6a6964 --- internal/aggregate/registry.go | 72 +++++++++++++++++++++ internal/aggregate/registry_test.go | 98 +++++++++++++++++++++++++++++ testdata/config.valid.yaml | 1 + 3 files changed, 171 insertions(+) create mode 100644 internal/aggregate/registry.go create mode 100644 internal/aggregate/registry_test.go diff --git a/internal/aggregate/registry.go b/internal/aggregate/registry.go new file mode 100644 index 0000000..7a47314 --- /dev/null +++ b/internal/aggregate/registry.go @@ -0,0 +1,72 @@ +package aggregate + +import ( + "sync" +) + +// Maintain a bounded set of unique request paths. This prevents metric cardinality explosion by rejecting new paths +// once the configured limit is reached. +type PathRegistry struct { + mu sync.RWMutex + paths map[string]struct{} + maxPaths int + overflowCount int +} + +// Creates a new PathRegistry with the specified maximum number of unique paths. +// Once this limit is reached, subsequent Add() calls for new paths will be rejected. +func NewPathRegistry(maxPaths int) *PathRegistry { + return &PathRegistry{ + paths: make(map[string]struct{}, maxPaths), + maxPaths: maxPaths, + } +} + +// Add attempts to add a path to the registry. +// Returns true if the path was accepted: either already existed or was added, +// false if rejected due to reaching the limit. +func (r *PathRegistry) Add(path string) bool { + r.mu.Lock() + defer r.mu.Unlock() + + // If path already exists, accept it + if _, exists := r.paths[path]; exists { + return true + } + + // If we haven't reached the limit, add the path + if len(r.paths) < r.maxPaths { + r.paths[path] = struct{}{} + return true + } + + // Limit reached - reject and increment overflow + r.overflowCount++ + return false +} + +// Contains checks if a path exists in the registry. +func (r *PathRegistry) Contains(path string) bool { + r.mu.RLock() + defer r.mu.RUnlock() + + _, exists := r.paths[path] + return exists +} + +// Count returns the number of unique paths in the registry. +func (r *PathRegistry) Count() int { + r.mu.RLock() + defer r.mu.RUnlock() + + return len(r.paths) +} + +// Returns the number of paths that were rejected +// due to the registry being at capacity. +func (r *PathRegistry) OverflowCount() int { + r.mu.RLock() + defer r.mu.RUnlock() + + return r.overflowCount +} diff --git a/internal/aggregate/registry_test.go b/internal/aggregate/registry_test.go new file mode 100644 index 0000000..7a1f9cc --- /dev/null +++ b/internal/aggregate/registry_test.go @@ -0,0 +1,98 @@ +package aggregate + +import ( + "testing" +) + +func TestPathRegistry_Add(t *testing.T) { + registry := NewPathRegistry(3) + + // Add paths within limit + if !registry.Add("/api/users") { + t.Error("Expected first path to be accepted") + } + if !registry.Add("/api/posts") { + t.Error("Expected second path to be accepted") + } + if !registry.Add("/api/comments") { + t.Error("Expected third path to be accepted") + } + + // Add duplicate path - should succeed + if !registry.Add("/api/users") { + t.Error("Expected duplicate path to be accepted") + } + + // Verify count is still 3 + if count := registry.Count(); count != 3 { + t.Errorf("Expected count 3, got %d", count) + } + + // Exceed limit + if registry.Add("/api/photos") { + t.Error("Expected fourth unique path to be rejected") + } + + // Verify overflow count + if overflow := registry.OverflowCount(); overflow != 1 { + t.Errorf("Expected overflow count 1, got %d", overflow) + } + + // Add another path beyond limit + if registry.Add("/api/videos") { + t.Error("Expected fifth unique path to be rejected") + } + + // Verify overflow count incremented + if overflow := registry.OverflowCount(); overflow != 2 { + t.Errorf("Expected overflow count 2, got %d", overflow) + } + + // Verify count is still 3 + if count := registry.Count(); count != 3 { + t.Errorf("Expected count 3, got %d", count) + } +} + +func TestPathRegistry_Contains(t *testing.T) { + registry := NewPathRegistry(3) + + registry.Add("/api/users") + registry.Add("/api/posts") + + if !registry.Contains("/api/users") { + t.Error("Expected /api/users to be in registry") + } + + if !registry.Contains("/api/posts") { + t.Error("Expected /api/posts to be in registry") + } + + if registry.Contains("/api/comments") { + t.Error("Expected /api/comments to NOT be in registry") + } +} + +func TestPathRegistry_Count(t *testing.T) { + registry := NewPathRegistry(5) + + if count := registry.Count(); count != 0 { + t.Errorf("Expected initial count 0, got %d", count) + } + + registry.Add("/api/users") + if count := registry.Count(); count != 1 { + t.Errorf("Expected count 1, got %d", count) + } + + registry.Add("/api/posts") + if count := registry.Count(); count != 2 { + t.Errorf("Expected count 2, got %d", count) + } + + // Add duplicate - count should not change + registry.Add("/api/users") + if count := registry.Count(); count != 2 { + t.Errorf("Expected count 2, got %d", count) + } +} diff --git a/testdata/config.valid.yaml b/testdata/config.valid.yaml index 656a8f8..b9a5f28 100644 --- a/testdata/config.valid.yaml +++ b/testdata/config.valid.yaml @@ -19,6 +19,7 @@ site: limits: max_paths: 1000 max_events_per_minute: 10000 + max_sources: 100 server: listen_addr: :8080