internal/normalize: harden against possible attacks; optimize registry
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Iaf89cda3c480d6a8371e5f146ee95fcf6a6a6964
This commit is contained in:
parent
b2256183e1
commit
ffb4ab2295
3 changed files with 24 additions and 11 deletions
|
|
@ -6,20 +6,22 @@ import (
|
||||||
"notashelf.dev/watchdog/internal/config"
|
"notashelf.dev/watchdog/internal/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
const maxPathLength = 2048
|
|
||||||
|
|
||||||
type PathNormalizer struct {
|
type PathNormalizer struct {
|
||||||
cfg config.PathConfig
|
cfg config.PathConfig
|
||||||
|
maxLength int
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
|
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
|
||||||
return &PathNormalizer{cfg: cfg}
|
return &PathNormalizer{
|
||||||
|
cfg: cfg,
|
||||||
|
maxLength: 2048,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *PathNormalizer) Normalize(path string) string {
|
func (n *PathNormalizer) Normalize(path string) string {
|
||||||
// Return as-is if path is too long
|
// Reject paths that are too long; don't bypass normalization
|
||||||
if len(path) > maxPathLength {
|
if len(path) > n.maxLength {
|
||||||
return path
|
return "/"
|
||||||
}
|
}
|
||||||
|
|
||||||
if path == "" {
|
if path == "" {
|
||||||
|
|
|
||||||
|
|
@ -89,8 +89,9 @@ func ExtractReferrerDomain(referrer, siteDomain string) string {
|
||||||
// - "news.ycombinator.com" -> "ycombinator.com"
|
// - "news.ycombinator.com" -> "ycombinator.com"
|
||||||
eTLDPlus1, err := publicsuffix.EffectiveTLDPlusOne(hostname)
|
eTLDPlus1, err := publicsuffix.EffectiveTLDPlusOne(hostname)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// If public suffix lookup fails, use hostname as-is
|
// If public suffix lookup fails (malformed/unknown TLD), return "other"
|
||||||
return hostname
|
// to prevent unbounded cardinality from malicious referrers
|
||||||
|
return "other"
|
||||||
}
|
}
|
||||||
|
|
||||||
return eTLDPlus1
|
return eTLDPlus1
|
||||||
|
|
|
||||||
|
|
@ -17,16 +17,26 @@ func NewReferrerRegistry(maxSources int) *ReferrerRegistry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attempt to add a referrer source to the registry. Returns the source to use ("other" if rejected).
|
// Attempt to add a referrer source to the registry.
|
||||||
|
// Returns the source to use ("other" if rejected).
|
||||||
func (r *ReferrerRegistry) Add(source string) string {
|
func (r *ReferrerRegistry) Add(source string) string {
|
||||||
if source == "direct" || source == "internal" {
|
if source == "direct" || source == "internal" {
|
||||||
return source
|
return source
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fast path: check with read lock first
|
||||||
|
r.mu.RLock()
|
||||||
|
if _, exists := r.sources[source]; exists {
|
||||||
|
r.mu.RUnlock()
|
||||||
|
return source
|
||||||
|
}
|
||||||
|
r.mu.RUnlock()
|
||||||
|
|
||||||
|
// Slow path: acquire write lock to add
|
||||||
r.mu.Lock()
|
r.mu.Lock()
|
||||||
defer r.mu.Unlock()
|
defer r.mu.Unlock()
|
||||||
|
|
||||||
// Already exists
|
// Double-check after acquiring write lock, another goroutine might have added it beforehand
|
||||||
if _, exists := r.sources[source]; exists {
|
if _, exists := r.sources[source]; exists {
|
||||||
return source
|
return source
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue