watchdog/internal/normalize/path.go
NotAShelf 0691e5ee34
internal: implement path normalization w/ configurable rules
Strips query strings and URL fragmenets, prevents unbounded Prometheus
metrics by normalizing paths like:

- `/users/12345/profile -> /users/:id/profile`
- `/page?utm_source=twitter -> /page`
- `/a/../b -> /b`

etc.

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I72f2fa2452f4666567143d052b5716476a6a6964
2026-03-02 22:37:50 +03:00

121 lines
2.3 KiB
Go

package normalize
import (
"strings"
"notashelf.dev/watchdog/internal/config"
)
const maxPathLength = 2048
type PathNormalizer struct {
cfg config.PathConfig
}
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
return &PathNormalizer{cfg: cfg}
}
func (n *PathNormalizer) Normalize(path string) string {
// Return as-is if path is too long
if len(path) > maxPathLength {
return path
}
if path == "" {
return "/"
}
// Strip query string
if n.cfg.StripQuery {
if idx := strings.IndexByte(path, '?'); idx != -1 {
path = path[:idx]
}
}
// Strip fragment
if n.cfg.StripFragment {
if idx := strings.IndexByte(path, '#'); idx != -1 {
path = path[:idx]
}
}
// Ensure leading slash
if !strings.HasPrefix(path, "/") {
path = "/" + path
}
// Split into segments, first element is *always* empty for paths starting with '/'
segments := strings.Split(path, "/")
if len(segments) > 0 && segments[0] == "" {
segments = segments[1:]
}
// Remove empty segments (from double slashes)
filtered := make([]string, 0, len(segments))
for _, seg := range segments {
if seg != "" {
filtered = append(filtered, seg)
}
}
segments = filtered
// Resolve . and .. segments
resolved := make([]string, 0, len(segments))
for _, seg := range segments {
if seg == "." {
// Skip current directory
continue
} else if seg == ".." {
// Go up one level if possible
if len(resolved) > 0 {
resolved = resolved[:len(resolved)-1]
}
// If already at root, skip ..
} else {
resolved = append(resolved, seg)
}
}
segments = resolved
// Collapse numeric segments
if n.cfg.CollapseNumericSegments {
for i, seg := range segments {
if isNumeric(seg) {
segments[i] = ":id"
}
}
}
// Limit segments
if n.cfg.MaxSegments > 0 && len(segments) > n.cfg.MaxSegments {
segments = segments[:n.cfg.MaxSegments]
}
// Reconstruct path
var result string
if len(segments) == 0 {
result = "/"
} else {
result = "/" + strings.Join(segments, "/")
}
// Strip trailing slash if configured (except root)
if n.cfg.NormalizeTrailingSlash && result != "/" && strings.HasSuffix(result, "/") {
result = strings.TrimSuffix(result, "/")
}
return result
}
func isNumeric(s string) bool {
if s == "" {
return false
}
for _, c := range s {
if c < '0' || c > '9' {
return false
}
}
return true
}