internal/normalize: harden against possible attacks; optimize registry

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Iaf89cda3c480d6a8371e5f146ee95fcf6a6a6964
This commit is contained in:
raf 2026-03-01 13:08:31 +03:00
commit ffb4ab2295
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
3 changed files with 24 additions and 11 deletions

View file

@ -6,20 +6,22 @@ import (
"notashelf.dev/watchdog/internal/config" "notashelf.dev/watchdog/internal/config"
) )
const maxPathLength = 2048
type PathNormalizer struct { type PathNormalizer struct {
cfg config.PathConfig cfg config.PathConfig
maxLength int
} }
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer { func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
return &PathNormalizer{cfg: cfg} return &PathNormalizer{
cfg: cfg,
maxLength: 2048,
}
} }
func (n *PathNormalizer) Normalize(path string) string { func (n *PathNormalizer) Normalize(path string) string {
// Return as-is if path is too long // Reject paths that are too long; don't bypass normalization
if len(path) > maxPathLength { if len(path) > n.maxLength {
return path return "/"
} }
if path == "" { if path == "" {

View file

@ -89,8 +89,9 @@ func ExtractReferrerDomain(referrer, siteDomain string) string {
// - "news.ycombinator.com" -> "ycombinator.com" // - "news.ycombinator.com" -> "ycombinator.com"
eTLDPlus1, err := publicsuffix.EffectiveTLDPlusOne(hostname) eTLDPlus1, err := publicsuffix.EffectiveTLDPlusOne(hostname)
if err != nil { if err != nil {
// If public suffix lookup fails, use hostname as-is // If public suffix lookup fails (malformed/unknown TLD), return "other"
return hostname // to prevent unbounded cardinality from malicious referrers
return "other"
} }
return eTLDPlus1 return eTLDPlus1

View file

@ -17,16 +17,26 @@ func NewReferrerRegistry(maxSources int) *ReferrerRegistry {
} }
} }
// Attempt to add a referrer source to the registry. Returns the source to use ("other" if rejected). // Attempt to add a referrer source to the registry.
// Returns the source to use ("other" if rejected).
func (r *ReferrerRegistry) Add(source string) string { func (r *ReferrerRegistry) Add(source string) string {
if source == "direct" || source == "internal" { if source == "direct" || source == "internal" {
return source return source
} }
// Fast path: check with read lock first
r.mu.RLock()
if _, exists := r.sources[source]; exists {
r.mu.RUnlock()
return source
}
r.mu.RUnlock()
// Slow path: acquire write lock to add
r.mu.Lock() r.mu.Lock()
defer r.mu.Unlock() defer r.mu.Unlock()
// Already exists // Double-check after acquiring write lock, another goroutine might have added it beforehand
if _, exists := r.sources[source]; exists { if _, exists := r.sources[source]; exists {
return source return source
} }