Strips query strings and URL fragmenets, prevents unbounded Prometheus metrics by normalizing paths like: - `/users/12345/profile -> /users/:id/profile` - `/page?utm_source=twitter -> /page` - `/a/../b -> /b` etc. Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I72f2fa2452f4666567143d052b5716476a6a6964
121 lines
2.3 KiB
Go
121 lines
2.3 KiB
Go
package normalize
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"notashelf.dev/watchdog/internal/config"
|
|
)
|
|
|
|
const maxPathLength = 2048
|
|
|
|
type PathNormalizer struct {
|
|
cfg config.PathConfig
|
|
}
|
|
|
|
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
|
|
return &PathNormalizer{cfg: cfg}
|
|
}
|
|
|
|
func (n *PathNormalizer) Normalize(path string) string {
|
|
// Return as-is if path is too long
|
|
if len(path) > maxPathLength {
|
|
return path
|
|
}
|
|
|
|
if path == "" {
|
|
return "/"
|
|
}
|
|
|
|
// Strip query string
|
|
if n.cfg.StripQuery {
|
|
if idx := strings.IndexByte(path, '?'); idx != -1 {
|
|
path = path[:idx]
|
|
}
|
|
}
|
|
|
|
// Strip fragment
|
|
if n.cfg.StripFragment {
|
|
if idx := strings.IndexByte(path, '#'); idx != -1 {
|
|
path = path[:idx]
|
|
}
|
|
}
|
|
|
|
// Ensure leading slash
|
|
if !strings.HasPrefix(path, "/") {
|
|
path = "/" + path
|
|
}
|
|
|
|
// Split into segments, first element is *always* empty for paths starting with '/'
|
|
segments := strings.Split(path, "/")
|
|
if len(segments) > 0 && segments[0] == "" {
|
|
segments = segments[1:]
|
|
}
|
|
|
|
// Remove empty segments (from double slashes)
|
|
filtered := make([]string, 0, len(segments))
|
|
for _, seg := range segments {
|
|
if seg != "" {
|
|
filtered = append(filtered, seg)
|
|
}
|
|
}
|
|
segments = filtered
|
|
|
|
// Resolve . and .. segments
|
|
resolved := make([]string, 0, len(segments))
|
|
for _, seg := range segments {
|
|
if seg == "." {
|
|
// Skip current directory
|
|
continue
|
|
} else if seg == ".." {
|
|
// Go up one level if possible
|
|
if len(resolved) > 0 {
|
|
resolved = resolved[:len(resolved)-1]
|
|
}
|
|
// If already at root, skip ..
|
|
} else {
|
|
resolved = append(resolved, seg)
|
|
}
|
|
}
|
|
segments = resolved
|
|
|
|
// Collapse numeric segments
|
|
if n.cfg.CollapseNumericSegments {
|
|
for i, seg := range segments {
|
|
if isNumeric(seg) {
|
|
segments[i] = ":id"
|
|
}
|
|
}
|
|
}
|
|
|
|
// Limit segments
|
|
if n.cfg.MaxSegments > 0 && len(segments) > n.cfg.MaxSegments {
|
|
segments = segments[:n.cfg.MaxSegments]
|
|
}
|
|
|
|
// Reconstruct path
|
|
var result string
|
|
if len(segments) == 0 {
|
|
result = "/"
|
|
} else {
|
|
result = "/" + strings.Join(segments, "/")
|
|
}
|
|
|
|
// Strip trailing slash if configured (except root)
|
|
if n.cfg.NormalizeTrailingSlash && result != "/" && strings.HasSuffix(result, "/") {
|
|
result = strings.TrimSuffix(result, "/")
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func isNumeric(s string) bool {
|
|
if s == "" {
|
|
return false
|
|
}
|
|
for _, c := range s {
|
|
if c < '0' || c > '9' {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|