internal/normalize: harden against possible attacks; optimize registry
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Iaf89cda3c480d6a8371e5f146ee95fcf6a6a6964
This commit is contained in:
parent
b2256183e1
commit
ffb4ab2295
3 changed files with 24 additions and 11 deletions
|
|
@ -6,20 +6,22 @@ import (
|
|||
"notashelf.dev/watchdog/internal/config"
|
||||
)
|
||||
|
||||
const maxPathLength = 2048
|
||||
|
||||
type PathNormalizer struct {
|
||||
cfg config.PathConfig
|
||||
cfg config.PathConfig
|
||||
maxLength int
|
||||
}
|
||||
|
||||
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
|
||||
return &PathNormalizer{cfg: cfg}
|
||||
return &PathNormalizer{
|
||||
cfg: cfg,
|
||||
maxLength: 2048,
|
||||
}
|
||||
}
|
||||
|
||||
func (n *PathNormalizer) Normalize(path string) string {
|
||||
// Return as-is if path is too long
|
||||
if len(path) > maxPathLength {
|
||||
return path
|
||||
// Reject paths that are too long; don't bypass normalization
|
||||
if len(path) > n.maxLength {
|
||||
return "/"
|
||||
}
|
||||
|
||||
if path == "" {
|
||||
|
|
|
|||
|
|
@ -89,8 +89,9 @@ func ExtractReferrerDomain(referrer, siteDomain string) string {
|
|||
// - "news.ycombinator.com" -> "ycombinator.com"
|
||||
eTLDPlus1, err := publicsuffix.EffectiveTLDPlusOne(hostname)
|
||||
if err != nil {
|
||||
// If public suffix lookup fails, use hostname as-is
|
||||
return hostname
|
||||
// If public suffix lookup fails (malformed/unknown TLD), return "other"
|
||||
// to prevent unbounded cardinality from malicious referrers
|
||||
return "other"
|
||||
}
|
||||
|
||||
return eTLDPlus1
|
||||
|
|
|
|||
|
|
@ -17,16 +17,26 @@ func NewReferrerRegistry(maxSources int) *ReferrerRegistry {
|
|||
}
|
||||
}
|
||||
|
||||
// Attempt to add a referrer source to the registry. Returns the source to use ("other" if rejected).
|
||||
// Attempt to add a referrer source to the registry.
|
||||
// Returns the source to use ("other" if rejected).
|
||||
func (r *ReferrerRegistry) Add(source string) string {
|
||||
if source == "direct" || source == "internal" {
|
||||
return source
|
||||
}
|
||||
|
||||
// Fast path: check with read lock first
|
||||
r.mu.RLock()
|
||||
if _, exists := r.sources[source]; exists {
|
||||
r.mu.RUnlock()
|
||||
return source
|
||||
}
|
||||
r.mu.RUnlock()
|
||||
|
||||
// Slow path: acquire write lock to add
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
// Already exists
|
||||
// Double-check after acquiring write lock, another goroutine might have added it beforehand
|
||||
if _, exists := r.sources[source]; exists {
|
||||
return source
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue