internal/normalize: harden against possible attacks; optimize registry

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Iaf89cda3c480d6a8371e5f146ee95fcf6a6a6964
This commit is contained in:
raf 2026-03-01 13:08:31 +03:00
commit ffb4ab2295
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
3 changed files with 24 additions and 11 deletions

View file

@ -6,20 +6,22 @@ import (
"notashelf.dev/watchdog/internal/config"
)
const maxPathLength = 2048
type PathNormalizer struct {
cfg config.PathConfig
cfg config.PathConfig
maxLength int
}
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
return &PathNormalizer{cfg: cfg}
return &PathNormalizer{
cfg: cfg,
maxLength: 2048,
}
}
func (n *PathNormalizer) Normalize(path string) string {
// Return as-is if path is too long
if len(path) > maxPathLength {
return path
// Reject paths that are too long; don't bypass normalization
if len(path) > n.maxLength {
return "/"
}
if path == "" {

View file

@ -89,8 +89,9 @@ func ExtractReferrerDomain(referrer, siteDomain string) string {
// - "news.ycombinator.com" -> "ycombinator.com"
eTLDPlus1, err := publicsuffix.EffectiveTLDPlusOne(hostname)
if err != nil {
// If public suffix lookup fails, use hostname as-is
return hostname
// If public suffix lookup fails (malformed/unknown TLD), return "other"
// to prevent unbounded cardinality from malicious referrers
return "other"
}
return eTLDPlus1

View file

@ -17,16 +17,26 @@ func NewReferrerRegistry(maxSources int) *ReferrerRegistry {
}
}
// Attempt to add a referrer source to the registry. Returns the source to use ("other" if rejected).
// Attempt to add a referrer source to the registry.
// Returns the source to use ("other" if rejected).
func (r *ReferrerRegistry) Add(source string) string {
if source == "direct" || source == "internal" {
return source
}
// Fast path: check with read lock first
r.mu.RLock()
if _, exists := r.sources[source]; exists {
r.mu.RUnlock()
return source
}
r.mu.RUnlock()
// Slow path: acquire write lock to add
r.mu.Lock()
defer r.mu.Unlock()
// Already exists
// Double-check after acquiring write lock, another goroutine might have added it beforehand
if _, exists := r.sources[source]; exists {
return source
}