mirror of
https://github.com/NotAShelf/watchdog.git
synced 2026-03-07 22:06:00 +00:00
Compare commits
No commits in common. "df06ed38bf540f5a626db04bc570050eac3286f6" and "f46697bd21ca9b398b06ca651505f89b55249b1b" have entirely different histories.
df06ed38bf
...
f46697bd21
16 changed files with 62 additions and 911 deletions
13
README.md
13
README.md
|
|
@ -75,7 +75,7 @@ installation mechanism.
|
||||||
$ go build -o watchdog .
|
$ go build -o watchdog .
|
||||||
|
|
||||||
# Run
|
# Run
|
||||||
$ ./watchdog --config config.yaml
|
$ ./watchdog -config config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
@ -207,20 +207,11 @@ While not final, some of the metrics collected are as follows:
|
||||||
- `web_custom_events_total{event}` - Custom event counts
|
- `web_custom_events_total{event}` - Custom event counts
|
||||||
- `web_daily_unique_visitors` - Estimated unique visitors (HyperLogLog)
|
- `web_daily_unique_visitors` - Estimated unique visitors (HyperLogLog)
|
||||||
|
|
||||||
**Cardinality metrics:**
|
**Health metrics:**
|
||||||
|
|
||||||
- `web_path_overflow_total` - Paths rejected due to cardinality limit
|
- `web_path_overflow_total` - Paths rejected due to cardinality limit
|
||||||
- `web_referrer_overflow_total` - Referrers rejected due to limit
|
- `web_referrer_overflow_total` - Referrers rejected due to limit
|
||||||
- `web_event_overflow_total` - Custom events rejected due to limit
|
- `web_event_overflow_total` - Custom events rejected due to limit
|
||||||
- `web_blocked_requests_total{reason}` - File server requests blocked by
|
|
||||||
security filters
|
|
||||||
|
|
||||||
**Process metrics:**
|
|
||||||
|
|
||||||
- `watchdog_build_info{version,commit,build_date}` - Build metadata
|
|
||||||
- `watchdog_start_time_seconds` - Unix timestamp of process start
|
|
||||||
- `go_*` - Go runtime metrics (goroutines, GC, memory)
|
|
||||||
- `process_*` - OS process metrics (CPU, RSS, file descriptors)
|
|
||||||
|
|
||||||
## Privacy
|
## Privacy
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,11 +11,8 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
cfgFile string
|
cfgFile string
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
version string
|
|
||||||
commit string
|
|
||||||
buildDate string
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var rootCmd = &cobra.Command{
|
var rootCmd = &cobra.Command{
|
||||||
|
|
@ -87,8 +84,7 @@ func initConfig() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Main(v, c, bd string) {
|
func Main() {
|
||||||
version, commit, buildDate = v, c, bd
|
|
||||||
if err := rootCmd.Execute(); err != nil {
|
if err := rootCmd.Execute(); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|
|
||||||
|
|
@ -11,14 +11,13 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
"notashelf.dev/watchdog/internal/aggregate"
|
"notashelf.dev/watchdog/internal/aggregate"
|
||||||
"notashelf.dev/watchdog/internal/api"
|
"notashelf.dev/watchdog/internal/api"
|
||||||
"notashelf.dev/watchdog/internal/config"
|
"notashelf.dev/watchdog/internal/config"
|
||||||
"notashelf.dev/watchdog/internal/health"
|
|
||||||
"notashelf.dev/watchdog/internal/limits"
|
|
||||||
"notashelf.dev/watchdog/internal/normalize"
|
"notashelf.dev/watchdog/internal/normalize"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -32,15 +31,6 @@ func Run(cfg *config.Config) error {
|
||||||
eventRegistry := aggregate.NewCustomEventRegistry(cfg.Limits.MaxCustomEvents)
|
eventRegistry := aggregate.NewCustomEventRegistry(cfg.Limits.MaxCustomEvents)
|
||||||
metricsAgg := aggregate.NewMetricsAggregator(pathRegistry, eventRegistry, cfg)
|
metricsAgg := aggregate.NewMetricsAggregator(pathRegistry, eventRegistry, cfg)
|
||||||
|
|
||||||
// Metric for tracking blocked file requests (scrapers/bots)
|
|
||||||
blockedRequests := prometheus.NewCounterVec(
|
|
||||||
prometheus.CounterOpts{
|
|
||||||
Name: "web_blocked_requests_total",
|
|
||||||
Help: "File server requests blocked by security filters",
|
|
||||||
},
|
|
||||||
[]string{"reason"},
|
|
||||||
)
|
|
||||||
|
|
||||||
// Load HLL state from previous run if it exists
|
// Load HLL state from previous run if it exists
|
||||||
if cfg.Site.SaltRotation != "" {
|
if cfg.Site.SaltRotation != "" {
|
||||||
log.Println("HLL state persistence enabled")
|
log.Println("HLL state persistence enabled")
|
||||||
|
|
@ -54,13 +44,6 @@ func Run(cfg *config.Config) error {
|
||||||
// Register Prometheus metrics
|
// Register Prometheus metrics
|
||||||
promRegistry := prometheus.NewRegistry()
|
promRegistry := prometheus.NewRegistry()
|
||||||
metricsAgg.MustRegister(promRegistry)
|
metricsAgg.MustRegister(promRegistry)
|
||||||
promRegistry.MustRegister(blockedRequests)
|
|
||||||
|
|
||||||
// Register health and runtime metrics
|
|
||||||
healthCollector := health.NewCollector(version, commit, buildDate)
|
|
||||||
if err := healthCollector.Register(promRegistry); err != nil {
|
|
||||||
return fmt.Errorf("failed to register health metrics: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create HTTP handlers
|
// Create HTTP handlers
|
||||||
ingestionHandler := api.NewIngestionHandler(
|
ingestionHandler := api.NewIngestionHandler(
|
||||||
|
|
@ -101,16 +84,16 @@ func Run(cfg *config.Config) error {
|
||||||
// Serve whitelisted static files from /web/ if the directory exists
|
// Serve whitelisted static files from /web/ if the directory exists
|
||||||
if info, err := os.Stat("web"); err == nil && info.IsDir() {
|
if info, err := os.Stat("web"); err == nil && info.IsDir() {
|
||||||
log.Println("Serving static files from /web/")
|
log.Println("Serving static files from /web/")
|
||||||
mux.Handle("/web/", safeFileServer("web", blockedRequests))
|
mux.Handle("/web/", safeFileServer("web"))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create HTTP server with timeouts
|
// Create HTTP server with timeouts
|
||||||
srv := &http.Server{
|
srv := &http.Server{
|
||||||
Addr: cfg.Server.ListenAddr,
|
Addr: cfg.Server.ListenAddr,
|
||||||
Handler: mux,
|
Handler: mux,
|
||||||
ReadTimeout: limits.HTTPReadTimeout,
|
ReadTimeout: 10 * time.Second,
|
||||||
WriteTimeout: limits.HTTPWriteTimeout,
|
WriteTimeout: 10 * time.Second,
|
||||||
IdleTimeout: limits.HTTPIdleTimeout,
|
IdleTimeout: 60 * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start server in goroutine
|
// Start server in goroutine
|
||||||
|
|
@ -132,8 +115,8 @@ func Run(cfg *config.Config) error {
|
||||||
case sig := <-shutdown:
|
case sig := <-shutdown:
|
||||||
log.Printf("Received signal: %v, starting graceful shutdown", sig)
|
log.Printf("Received signal: %v, starting graceful shutdown", sig)
|
||||||
|
|
||||||
// Give outstanding requests time to complete
|
// Give outstanding requests 30 seconds to complete
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), limits.ShutdownTimeout)
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Shutdown metrics aggregator.
|
// Shutdown metrics aggregator.
|
||||||
|
|
@ -170,7 +153,7 @@ func basicAuth(next http.Handler, username, password string) http.Handler {
|
||||||
// Creates a file server that only serves whitelisted files. Blocks dotfiles, .git, .env, etc.
|
// Creates a file server that only serves whitelisted files. Blocks dotfiles, .git, .env, etc.
|
||||||
// TODO: I need to hook this up to eris somehow so I can just forward the paths that are being
|
// TODO: I need to hook this up to eris somehow so I can just forward the paths that are being
|
||||||
// scanned despite not being on a whitelist. Would be a good way of detecting scrapers, maybe.
|
// scanned despite not being on a whitelist. Would be a good way of detecting scrapers, maybe.
|
||||||
func safeFileServer(root string, blockedRequests *prometheus.CounterVec) http.Handler {
|
func safeFileServer(root string) http.Handler {
|
||||||
fs := http.FileServer(http.Dir(root))
|
fs := http.FileServer(http.Dir(root))
|
||||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
// Clean the path
|
// Clean the path
|
||||||
|
|
@ -178,8 +161,6 @@ func safeFileServer(root string, blockedRequests *prometheus.CounterVec) http.Ha
|
||||||
|
|
||||||
// Block directory listings
|
// Block directory listings
|
||||||
if strings.HasSuffix(path, "/") {
|
if strings.HasSuffix(path, "/") {
|
||||||
blockedRequests.WithLabelValues("directory_listing").Inc()
|
|
||||||
log.Printf("Blocked directory listing attempt: %s from %s", path, r.RemoteAddr)
|
|
||||||
http.NotFound(w, r)
|
http.NotFound(w, r)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
@ -187,8 +168,6 @@ func safeFileServer(root string, blockedRequests *prometheus.CounterVec) http.Ha
|
||||||
// Block dotfiles and sensitive files
|
// Block dotfiles and sensitive files
|
||||||
for segment := range strings.SplitSeq(path, "/") {
|
for segment := range strings.SplitSeq(path, "/") {
|
||||||
if strings.HasPrefix(segment, ".") {
|
if strings.HasPrefix(segment, ".") {
|
||||||
blockedRequests.WithLabelValues("dotfile").Inc()
|
|
||||||
log.Printf("Blocked dotfile access: %s from %s", path, r.RemoteAddr)
|
|
||||||
http.NotFound(w, r)
|
http.NotFound(w, r)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
@ -198,8 +177,6 @@ func safeFileServer(root string, blockedRequests *prometheus.CounterVec) http.Ha
|
||||||
strings.Contains(lower, "config") ||
|
strings.Contains(lower, "config") ||
|
||||||
strings.HasSuffix(lower, ".bak") ||
|
strings.HasSuffix(lower, ".bak") ||
|
||||||
strings.HasSuffix(lower, "~") {
|
strings.HasSuffix(lower, "~") {
|
||||||
blockedRequests.WithLabelValues("sensitive_file").Inc()
|
|
||||||
log.Printf("Blocked sensitive file access: %s from %s", path, r.RemoteAddr)
|
|
||||||
http.NotFound(w, r)
|
http.NotFound(w, r)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
@ -208,8 +185,6 @@ func safeFileServer(root string, blockedRequests *prometheus.CounterVec) http.Ha
|
||||||
// Only serve .js, .html, .css files
|
// Only serve .js, .html, .css files
|
||||||
ext := strings.ToLower(filepath.Ext(path))
|
ext := strings.ToLower(filepath.Ext(path))
|
||||||
if ext != ".js" && ext != ".html" && ext != ".css" {
|
if ext != ".js" && ext != ".html" && ext != ".css" {
|
||||||
blockedRequests.WithLabelValues("invalid_extension").Inc()
|
|
||||||
log.Printf("Blocked invalid extension: %s from %s", path, r.RemoteAddr)
|
|
||||||
http.NotFound(w, r)
|
http.NotFound(w, r)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,237 +0,0 @@
|
||||||
# Configuration
|
|
||||||
|
|
||||||
Watchdog supports multiple configuration sources with the following precedence
|
|
||||||
(highest to lowest):
|
|
||||||
|
|
||||||
1. **Command-line flags**
|
|
||||||
2. **Environment variables**
|
|
||||||
3. **Configuration file**
|
|
||||||
4. **Defaults**
|
|
||||||
|
|
||||||
## Configuration File
|
|
||||||
|
|
||||||
The primary configuration method is via YAML file. By default, Watchdog looks
|
|
||||||
for:
|
|
||||||
|
|
||||||
- `./config.yaml` (current directory)
|
|
||||||
- `/etc/watchdog/config.yaml` (system-wide)
|
|
||||||
|
|
||||||
Specify a custom location:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Provide your configuration YAML file with --config
|
|
||||||
$ watchdog --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
See [config.example.yaml](../config.example.yaml) for all available options.
|
|
||||||
|
|
||||||
## Environment Variables
|
|
||||||
|
|
||||||
All configuration options can be set via environment variables with the
|
|
||||||
`WATCHDOG_` prefix.
|
|
||||||
|
|
||||||
Nested fields use underscore separators. For example:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# site.domains
|
|
||||||
$ export WATCHDOG_SITE_DOMAINS="example.com,blog.example.com"
|
|
||||||
|
|
||||||
# server.listen_addr
|
|
||||||
$ export WATCHDOG_SERVER_LISTEN_ADDR="127.0.0.1:8080"
|
|
||||||
|
|
||||||
# site.collect.pageviews
|
|
||||||
$ export WATCHDOG_SITE_COLLECT_PAGEVIEWS=true
|
|
||||||
|
|
||||||
# limits.max_paths
|
|
||||||
$ export WATCHDOG_LIMITS_MAX_PATHS=10000
|
|
||||||
```
|
|
||||||
|
|
||||||
### Common Environment Variables
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Server
|
|
||||||
WATCHDOG_SERVER_LISTEN_ADDR="127.0.0.1:8080"
|
|
||||||
WATCHDOG_SERVER_METRICS_PATH="/metrics"
|
|
||||||
WATCHDOG_SERVER_INGESTION_PATH="/api/event"
|
|
||||||
WATCHDOG_SERVER_STATE_PATH="/var/lib/watchdog/hll.state"
|
|
||||||
|
|
||||||
# Site
|
|
||||||
WATCHDOG_SITE_DOMAINS="example.com" # comma-separated for multiple
|
|
||||||
WATCHDOG_SITE_SALT_ROTATION="daily"
|
|
||||||
WATCHDOG_SITE_SAMPLING=1.0
|
|
||||||
|
|
||||||
# Collection
|
|
||||||
WATCHDOG_SITE_COLLECT_PAGEVIEWS=true
|
|
||||||
WATCHDOG_SITE_COLLECT_COUNTRY=true
|
|
||||||
WATCHDOG_SITE_COLLECT_DEVICE=true
|
|
||||||
WATCHDOG_SITE_COLLECT_REFERRER="domain"
|
|
||||||
WATCHDOG_SITE_COLLECT_DOMAIN=false
|
|
||||||
|
|
||||||
# Limits
|
|
||||||
WATCHDOG_LIMITS_MAX_PATHS=10000
|
|
||||||
WATCHDOG_LIMITS_MAX_SOURCES=500
|
|
||||||
WATCHDOG_LIMITS_MAX_CUSTOM_EVENTS=100
|
|
||||||
WATCHDOG_LIMITS_MAX_EVENTS_PER_MINUTE=10000
|
|
||||||
|
|
||||||
# Security
|
|
||||||
WATCHDOG_SECURITY_CORS_ENABLED=false
|
|
||||||
WATCHDOG_SECURITY_METRICS_AUTH_ENABLED=false
|
|
||||||
WATCHDOG_SECURITY_METRICS_AUTH_USERNAME="admin"
|
|
||||||
WATCHDOG_SECURITY_METRICS_AUTH_PASSWORD="changeme"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Command-Line Flags
|
|
||||||
|
|
||||||
Command-line flags override both config file and environment variables:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Override server address
|
|
||||||
watchdog --listen-addr :9090
|
|
||||||
|
|
||||||
# Override metrics path
|
|
||||||
watchdog --metrics-path /prometheus/metrics
|
|
||||||
|
|
||||||
# Override ingestion path
|
|
||||||
watchdog --ingestion-path /api/v1/event
|
|
||||||
|
|
||||||
# Combine multiple overrides
|
|
||||||
watchdog --config prod.yaml --listen-addr :9090 --metrics-path /metrics
|
|
||||||
```
|
|
||||||
|
|
||||||
Available flags:
|
|
||||||
|
|
||||||
- `--config string` - Path to config file
|
|
||||||
- `--listen-addr string` - Server listen address
|
|
||||||
- `--metrics-path string` - Metrics endpoint path
|
|
||||||
- `--ingestion-path string` - Ingestion endpoint path
|
|
||||||
|
|
||||||
## Configuration Precedence Example
|
|
||||||
|
|
||||||
Given:
|
|
||||||
|
|
||||||
**config.yaml:**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
listen_addr: ":8080"
|
|
||||||
metrics_path: "/metrics"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Environment:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export WATCHDOG_SERVER_LISTEN_ADDR=":9090"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Command:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
watchdog --metrics-path "/prometheus/metrics"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Result:**
|
|
||||||
|
|
||||||
- `listen_addr`: `:9090` (from environment variable)
|
|
||||||
- `metrics_path`: `/prometheus/metrics` (from CLI flag)
|
|
||||||
|
|
||||||
## Systemd Integration
|
|
||||||
|
|
||||||
Environment variables work seamlessly with systemd:
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[Service]
|
|
||||||
Environment="WATCHDOG_SERVER_LISTEN_ADDR=127.0.0.1:8080"
|
|
||||||
Environment="WATCHDOG_SITE_DOMAINS=example.com"
|
|
||||||
Environment="WATCHDOG_LIMITS_MAX_PATHS=10000"
|
|
||||||
ExecStart=/usr/local/bin/watchdog --config /etc/watchdog/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Or use `EnvironmentFile`:
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[Service]
|
|
||||||
EnvironmentFile=/etc/watchdog/env
|
|
||||||
ExecStart=/usr/local/bin/watchdog
|
|
||||||
```
|
|
||||||
|
|
||||||
**/etc/watchdog/env:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
WATCHDOG_SERVER_LISTEN_ADDR=127.0.0.1:8080
|
|
||||||
WATCHDOG_SITE_DOMAINS=example.com
|
|
||||||
WATCHDOG_LIMITS_MAX_PATHS=10000
|
|
||||||
```
|
|
||||||
|
|
||||||
## NixOS Integration
|
|
||||||
|
|
||||||
NixOS configuration automatically converts to the correct format:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
{
|
|
||||||
services.watchdog = {
|
|
||||||
enable = true;
|
|
||||||
settings = {
|
|
||||||
site.domains = [ "example.com" ];
|
|
||||||
server.listen_addr = "127.0.0.1:8080";
|
|
||||||
limits.max_paths = 10000;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
This is equivalent to setting environment variables or using a config file.
|
|
||||||
|
|
||||||
## Validation
|
|
||||||
|
|
||||||
Configuration is validated on startup. Invalid values will cause Watchdog to
|
|
||||||
exit with an error:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ watchdog
|
|
||||||
Error: config validation failed: site.domains is required
|
|
||||||
```
|
|
||||||
|
|
||||||
Common validation errors:
|
|
||||||
|
|
||||||
- `site.domains is required` - No domains configured
|
|
||||||
- `limits.max_paths must be greater than 0` - Invalid cardinality limit
|
|
||||||
- `site.collect.referrer must be 'off', 'domain', or 'url'` - Invalid referrer
|
|
||||||
mode
|
|
||||||
- `site.sampling must be between 0.0 and 1.0` - Invalid sampling rate
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
1. **Use config file for base configuration** - Easier to version control and
|
|
||||||
review
|
|
||||||
2. **Use environment variables for secrets** - Don't commit passwords to config
|
|
||||||
files
|
|
||||||
3. **Use CLI flags for testing/overrides** - Quick temporary changes without
|
|
||||||
editing files
|
|
||||||
|
|
||||||
Example hybrid approach:
|
|
||||||
|
|
||||||
**config.yaml:**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
site:
|
|
||||||
domains:
|
|
||||||
- example.com
|
|
||||||
collect:
|
|
||||||
pageviews: true
|
|
||||||
device: true
|
|
||||||
|
|
||||||
limits:
|
|
||||||
max_paths: 10000
|
|
||||||
```
|
|
||||||
|
|
||||||
**Environment (secrets):**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export WATCHDOG_SECURITY_METRICS_AUTH_PASSWORD="$SECRET_PASSWORD"
|
|
||||||
```
|
|
||||||
|
|
||||||
**CLI (testing):**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
watchdog --listen-addr :9090 # Test on different port
|
|
||||||
```
|
|
||||||
|
|
@ -1,300 +0,0 @@
|
||||||
# Observability Setup
|
|
||||||
|
|
||||||
Watchdog exposes Prometheus-formatted metrics at `/metrics`. You need a
|
|
||||||
time-series database to scrape and store these metrics, then visualize them in
|
|
||||||
Grafana.
|
|
||||||
|
|
||||||
> [!IMPORTANT]
|
|
||||||
>
|
|
||||||
> **Why you need Prometheus:**
|
|
||||||
>
|
|
||||||
> - Watchdog exposes _current state_ (counters, gauges)
|
|
||||||
> - Prometheus _scrapes periodically_ and _stores time-series data_
|
|
||||||
> - Grafana _visualizes_ the historical data from Prometheus
|
|
||||||
> - Grafana cannot directly scrape Prometheus `/metrics` endpoints
|
|
||||||
|
|
||||||
## Prometheus Setup
|
|
||||||
|
|
||||||
### Configuring Prometheus
|
|
||||||
|
|
||||||
Create `/etc/prometheus/prometheus.yml`:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
global:
|
|
||||||
scrape_interval: 15s
|
|
||||||
evaluation_interval: 15s
|
|
||||||
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: "watchdog"
|
|
||||||
static_configs:
|
|
||||||
- targets: ["localhost:8080"]
|
|
||||||
|
|
||||||
# Optional: scrape multiple Watchdog instances
|
|
||||||
# static_configs:
|
|
||||||
# - targets:
|
|
||||||
# - 'watchdog-1.example.com:8080'
|
|
||||||
# - 'watchdog-2.example.com:8080'
|
|
||||||
# labels:
|
|
||||||
# instance: 'production'
|
|
||||||
|
|
||||||
# Scrape Prometheus itself
|
|
||||||
- job_name: "prometheus"
|
|
||||||
static_configs:
|
|
||||||
- targets: ["localhost:9090"]
|
|
||||||
```
|
|
||||||
|
|
||||||
### Verify Prometheus' health state
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check Prometheus is running
|
|
||||||
curl http://localhost:9090/-/healthy
|
|
||||||
|
|
||||||
# Check it's scraping Watchdog
|
|
||||||
curl http://localhost:9090/api/v1/targets
|
|
||||||
```
|
|
||||||
|
|
||||||
### NixOS
|
|
||||||
|
|
||||||
Add to your NixOS configuration:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
{
|
|
||||||
services.prometheus = {
|
|
||||||
enable = true;
|
|
||||||
port = 9090;
|
|
||||||
|
|
||||||
# Retention period
|
|
||||||
retentionTime = "30d";
|
|
||||||
|
|
||||||
scrapeConfigs = [
|
|
||||||
{
|
|
||||||
job_name = "watchdog";
|
|
||||||
static_configs = [{
|
|
||||||
targets = [ "localhost:8080" ];
|
|
||||||
}];
|
|
||||||
}
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
# Open firewall if needed
|
|
||||||
# networking.firewall.allowedTCPPorts = [ 9090 ];
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
For multiple Watchdog instances:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
{
|
|
||||||
services.prometheus.scrapeConfigs = [
|
|
||||||
{
|
|
||||||
job_name = "watchdog";
|
|
||||||
static_configs = [
|
|
||||||
{
|
|
||||||
labels.env = "production";
|
|
||||||
targets = [
|
|
||||||
"watchdog-1:8080"
|
|
||||||
"watchdog-2:8080"
|
|
||||||
"watchdog-3:8080"
|
|
||||||
];
|
|
||||||
}
|
|
||||||
];
|
|
||||||
}
|
|
||||||
];
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Grafana Setup
|
|
||||||
|
|
||||||
### NixOS
|
|
||||||
|
|
||||||
```nix
|
|
||||||
{
|
|
||||||
services.grafana = {
|
|
||||||
enable = true;
|
|
||||||
settings = {
|
|
||||||
server = {
|
|
||||||
http_addr = "127.0.0.1";
|
|
||||||
http_port = 3000;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
provision = {
|
|
||||||
enable = true;
|
|
||||||
|
|
||||||
datasources.settings.datasources = [{
|
|
||||||
name = "Prometheus";
|
|
||||||
type = "prometheus";
|
|
||||||
url = "http://localhost:9090";
|
|
||||||
isDefault = true;
|
|
||||||
}];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configure Data Source (Manual)
|
|
||||||
|
|
||||||
If you're not using NixOS for provisioning, then you'll need to do provisioning
|
|
||||||
_imperatively_ from your Grafana configuration. Ths can be done through the
|
|
||||||
admin panel by navigating to `Configuration`, and choosing "add data source"
|
|
||||||
under `Data Sources`. Select your prometheus instance, and save it.
|
|
||||||
|
|
||||||
### Import Pre-built Dashboard
|
|
||||||
|
|
||||||
A sample Grafana dashboard is provided with support for multi-host and
|
|
||||||
multi-site configurations. Import it, configure the data source and it should
|
|
||||||
work out of the box.
|
|
||||||
|
|
||||||
If you're not using NixOS for provisioning, the dashboard _also_ needs to be
|
|
||||||
provisioned manually. Under `Dashboards`, select `Import` and provide the JSON
|
|
||||||
contents or upload the sample dashboard from `contrib/grafana/watchdog.json`.
|
|
||||||
Select your Prometheus data source and import it.
|
|
||||||
|
|
||||||
See [contrib/grafana/README.md](../contrib/grafana/README.md) for full
|
|
||||||
documentation.
|
|
||||||
|
|
||||||
## Example Queries
|
|
||||||
|
|
||||||
Once Prometheus is scraping Watchdog and Grafana is connected, you may write
|
|
||||||
your own widgets or create queries. Here are some example queries using
|
|
||||||
Prometheus query language, promql. Those are provided as examples and might not
|
|
||||||
provide everything you need. Nevertheless, use them to improve your setup at
|
|
||||||
your disposal.
|
|
||||||
|
|
||||||
If you believe you have some valuable widgets that you'd like to contribute
|
|
||||||
back, feel free!
|
|
||||||
|
|
||||||
### Top 10 Pages by Traffic
|
|
||||||
|
|
||||||
```promql
|
|
||||||
topk(10, sum by (path) (rate(web_pageviews_total[5m])))
|
|
||||||
```
|
|
||||||
|
|
||||||
### Mobile vs Desktop Split
|
|
||||||
|
|
||||||
```promql
|
|
||||||
sum by (device) (rate(web_pageviews_total[1h]))
|
|
||||||
```
|
|
||||||
|
|
||||||
### Unique Visitors
|
|
||||||
|
|
||||||
```promql
|
|
||||||
web_daily_unique_visitors
|
|
||||||
```
|
|
||||||
|
|
||||||
### Top Referrers
|
|
||||||
|
|
||||||
```promql
|
|
||||||
topk(10, sum by (referrer) (rate(web_pageviews_total{referrer!="direct"}[1d])))
|
|
||||||
```
|
|
||||||
|
|
||||||
### Multi-Site: Traffic per Domain
|
|
||||||
|
|
||||||
```promql
|
|
||||||
sum by (domain) (rate(web_pageviews_total[1h]))
|
|
||||||
```
|
|
||||||
|
|
||||||
### Cardinality Health
|
|
||||||
|
|
||||||
```promql
|
|
||||||
# Should be near zero
|
|
||||||
rate(web_path_overflow_total[5m])
|
|
||||||
rate(web_referrer_overflow_total[5m])
|
|
||||||
rate(web_event_overflow_total[5m])
|
|
||||||
```
|
|
||||||
|
|
||||||
## Horizontal Scaling Considerations
|
|
||||||
|
|
||||||
When running multiple Watchdog instances:
|
|
||||||
|
|
||||||
1. **Each instance exposes its own metrics** - Prometheus scrapes all instances
|
|
||||||
2. **Prometheus aggregates automatically** - use `sum()` in queries to aggregate
|
|
||||||
across instances
|
|
||||||
3. **No shared state needed** - each Watchdog instance is independent
|
|
||||||
|
|
||||||
Watchdog is almost entirely stateless, so horizontal scaling should be trivial
|
|
||||||
as long as you have the necessary infrastructure and, well, the patience.
|
|
||||||
Example with 3 instances:
|
|
||||||
|
|
||||||
```promql
|
|
||||||
# Total pageviews across all instances
|
|
||||||
sum(rate(web_pageviews_total[5m]))
|
|
||||||
|
|
||||||
# Per-instance breakdown
|
|
||||||
sum by (instance) (rate(web_pageviews_total[5m]))
|
|
||||||
```
|
|
||||||
|
|
||||||
## Alternatives to Prometheus
|
|
||||||
|
|
||||||
### VictoriaMetrics
|
|
||||||
|
|
||||||
Drop-in Prometheus replacement with better performance and compression:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
{
|
|
||||||
services.victoriametrics = {
|
|
||||||
enable = true;
|
|
||||||
listenAddress = ":8428";
|
|
||||||
retentionPeriod = "12month";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Configure Prometheus to remote-write to VictoriaMetrics
|
|
||||||
services.prometheus = {
|
|
||||||
enable = true;
|
|
||||||
remoteWrite = [{
|
|
||||||
url = "http://localhost:8428/api/v1/write";
|
|
||||||
}];
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Grafana Agent
|
|
||||||
|
|
||||||
Lightweight alternative that scrapes and forwards to Grafana Cloud or local
|
|
||||||
Prometheus:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Systemd setup for Grafana Agent
|
|
||||||
sudo systemctl enable --now grafana-agent
|
|
||||||
```
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# /etc/grafana-agent.yaml
|
|
||||||
metrics:
|
|
||||||
wal_directory: /var/lib/grafana-agent
|
|
||||||
configs:
|
|
||||||
- name: watchdog
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: watchdog
|
|
||||||
static_configs:
|
|
||||||
- targets: ["localhost:8080"]
|
|
||||||
remote_write:
|
|
||||||
- url: http://localhost:9090/api/v1/write
|
|
||||||
```
|
|
||||||
|
|
||||||
## Monitoring the Monitoring
|
|
||||||
|
|
||||||
Monitor Prometheus itself:
|
|
||||||
|
|
||||||
```promql
|
|
||||||
# Prometheus scrape success rate
|
|
||||||
up{job="watchdog"}
|
|
||||||
|
|
||||||
# Scrape duration
|
|
||||||
scrape_duration_seconds{job="watchdog"}
|
|
||||||
|
|
||||||
# Time since last scrape
|
|
||||||
time() - timestamp(up{job="watchdog"})
|
|
||||||
```
|
|
||||||
|
|
||||||
## Additional Recommendations
|
|
||||||
|
|
||||||
1. **Retention**: Set `--storage.tsdb.retention.time=30d` or longer based on
|
|
||||||
disk space
|
|
||||||
2. **Backups**: Back up `/var/lib/prometheus` periodically (or whatever your
|
|
||||||
state directory is)
|
|
||||||
3. **Alerting**: Configure Prometheus alerting rules for critical metrics
|
|
||||||
4. **High Availability**: Run multiple Prometheus instances with identical
|
|
||||||
configs
|
|
||||||
5. **Remote Storage**: For long-term storage, use Thanos, Cortex, or
|
|
||||||
VictoriaMetrics
|
|
||||||
|
|
@ -27,7 +27,6 @@
|
||||||
pkgs.alejandra
|
pkgs.alejandra
|
||||||
pkgs.fd
|
pkgs.fd
|
||||||
pkgs.prettier
|
pkgs.prettier
|
||||||
pkgs.deno
|
|
||||||
pkgs.go # provides gofmt
|
pkgs.go # provides gofmt
|
||||||
pkgs.golines
|
pkgs.golines
|
||||||
];
|
];
|
||||||
|
|
@ -39,9 +38,6 @@
|
||||||
# Format HTML & Javascript files with Prettier
|
# Format HTML & Javascript files with Prettier
|
||||||
fd "$@" -t f -e html -e js -x prettier -w '{}'
|
fd "$@" -t f -e html -e js -x prettier -w '{}'
|
||||||
|
|
||||||
# Format Markdown with Deno's Markdown formatter
|
|
||||||
fd "$@" -t f -e md -x deno fmt -q '{}'
|
|
||||||
|
|
||||||
# Format go files with both gofmt & golines
|
# Format go files with both gofmt & golines
|
||||||
fd "$@" -t f -e go -x golines -l -w --max-len=110 \
|
fd "$@" -t f -e go -x golines -l -w --max-len=110 \
|
||||||
--base-formatter=gofmt \
|
--base-formatter=gofmt \
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ import (
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"notashelf.dev/watchdog/internal/config"
|
"notashelf.dev/watchdog/internal/config"
|
||||||
"notashelf.dev/watchdog/internal/limits"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var prometheusLabelPattern = regexp.MustCompile(`^[a-zA-Z0-9_/:.-]*$`)
|
var prometheusLabelPattern = regexp.MustCompile(`^[a-zA-Z0-9_/:.-]*$`)
|
||||||
|
|
@ -122,11 +121,11 @@ func NewMetricsAggregator(
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
// Background goroutine to update the unique visitors gauge periodically
|
// Background goroutine to update the unique visitors gauge every 10 seconds
|
||||||
// instead of on every request. This should help with performance.
|
// instead of on every request. This should help with performance.
|
||||||
func (m *MetricsAggregator) updateUniquesGauge() {
|
func (m *MetricsAggregator) updateUniquesGauge() {
|
||||||
defer m.wg.Done()
|
defer m.wg.Done()
|
||||||
ticker := time.NewTicker(limits.UniquesUpdatePeriod)
|
ticker := time.NewTicker(10 * time.Second)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ func (u *UniquesEstimator) Estimate() uint64 {
|
||||||
return u.hll.Estimate()
|
return u.hll.Estimate()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generates a deterministic salt based on the current date
|
// Cenerates a deterministic salt based on the current date
|
||||||
// Same day = same salt, different day = different salt
|
// Same day = same salt, different day = different salt
|
||||||
func dailySalt(t time.Time) string {
|
func dailySalt(t time.Time) string {
|
||||||
// Use UTC to ensure consistent rotation regardless of timezone
|
// Use UTC to ensure consistent rotation regardless of timezone
|
||||||
|
|
|
||||||
|
|
@ -137,7 +137,7 @@ func (h *IngestionHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
// Device classification
|
// Device classification
|
||||||
if h.cfg.Site.Collect.Device {
|
if h.cfg.Site.Collect.Device {
|
||||||
device = h.classifyDevice(event.Width, userAgent)
|
device = h.classifyDevice(event.Width)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Referrer classification
|
// Referrer classification
|
||||||
|
|
@ -271,43 +271,19 @@ func (h *IngestionHandler) ipInCIDR(ip, cidr string) bool {
|
||||||
return network.Contains(testIP)
|
return network.Contains(testIP)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Classifies device using both screen width and User-Agent parsing
|
// Classifies screen width into device categories using configured breakpoints
|
||||||
// Uses UA hints for better detection, falls back to width breakpoints
|
// FIXME: we need a more robust mechanism for classifying devices. Breakpoints
|
||||||
func (h *IngestionHandler) classifyDevice(width int, userAgent string) string {
|
// are the only ones I can think of *right now* but I'm positive there are better
|
||||||
// First try User-Agent based detection for better accuracy
|
// mechanisns. We'll get to this later.
|
||||||
ua := strings.ToLower(userAgent)
|
func (h *IngestionHandler) classifyDevice(width int) string {
|
||||||
|
if width == 0 {
|
||||||
// Tablet detection via UA (must come before mobile: Android tablets lack "mobile" keyword)
|
return "unknown"
|
||||||
if strings.Contains(ua, "tablet") ||
|
|
||||||
strings.Contains(ua, "ipad") ||
|
|
||||||
(strings.Contains(ua, "android") && !strings.Contains(ua, "mobile")) {
|
|
||||||
return "tablet"
|
|
||||||
}
|
}
|
||||||
|
if width < h.cfg.Limits.DeviceBreakpoints.Mobile {
|
||||||
// Mobile detection via UA
|
|
||||||
if strings.Contains(ua, "mobile") ||
|
|
||||||
strings.Contains(ua, "iphone") ||
|
|
||||||
strings.Contains(ua, "ipod") ||
|
|
||||||
strings.Contains(ua, "windows phone") ||
|
|
||||||
strings.Contains(ua, "blackberry") {
|
|
||||||
return "mobile"
|
return "mobile"
|
||||||
}
|
}
|
||||||
|
if width < h.cfg.Limits.DeviceBreakpoints.Tablet {
|
||||||
// If UA doesn't provide clear signal, use width breakpoints
|
return "tablet"
|
||||||
if width > 0 {
|
|
||||||
if width < h.cfg.Limits.DeviceBreakpoints.Mobile {
|
|
||||||
return "mobile"
|
|
||||||
}
|
|
||||||
if width < h.cfg.Limits.DeviceBreakpoints.Tablet {
|
|
||||||
return "tablet"
|
|
||||||
}
|
|
||||||
return "desktop"
|
|
||||||
}
|
}
|
||||||
|
return "desktop"
|
||||||
// Default to desktop if UA suggests desktop browser
|
|
||||||
if userAgent != "" {
|
|
||||||
return "desktop"
|
|
||||||
}
|
|
||||||
|
|
||||||
return "unknown"
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
@ -206,113 +207,51 @@ func TestIngestionHandler_InvalidJSON(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTestHandler(cfg *config.Config) *IngestionHandler {
|
func TestIngestionHandler_DeviceClassification(t *testing.T) {
|
||||||
|
cfg := config.Config{
|
||||||
|
Site: config.SiteConfig{
|
||||||
|
Domains: []string{"example.com"},
|
||||||
|
Collect: config.CollectConfig{
|
||||||
|
Pageviews: true,
|
||||||
|
Device: true,
|
||||||
|
},
|
||||||
|
Path: config.PathConfig{},
|
||||||
|
},
|
||||||
|
Limits: config.LimitsConfig{
|
||||||
|
MaxPaths: 100,
|
||||||
|
MaxSources: 50,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
pathNorm := normalize.NewPathNormalizer(cfg.Site.Path)
|
pathNorm := normalize.NewPathNormalizer(cfg.Site.Path)
|
||||||
pathRegistry := aggregate.NewPathRegistry(cfg.Limits.MaxPaths)
|
pathRegistry := aggregate.NewPathRegistry(cfg.Limits.MaxPaths)
|
||||||
refRegistry := normalize.NewReferrerRegistry(cfg.Limits.MaxSources)
|
refRegistry := normalize.NewReferrerRegistry(cfg.Limits.MaxSources)
|
||||||
metricsAgg := aggregate.NewMetricsAggregator(
|
metricsAgg := aggregate.NewMetricsAggregator(
|
||||||
pathRegistry,
|
pathRegistry,
|
||||||
aggregate.NewCustomEventRegistry(100),
|
aggregate.NewCustomEventRegistry(100),
|
||||||
cfg,
|
&cfg,
|
||||||
)
|
)
|
||||||
return NewIngestionHandler(cfg, pathNorm, pathRegistry, refRegistry, metricsAgg)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestClassifyDevice_UA(t *testing.T) {
|
handler := NewIngestionHandler(&cfg, pathNorm, pathRegistry, refRegistry, metricsAgg)
|
||||||
cfg := &config.Config{
|
|
||||||
Limits: config.LimitsConfig{
|
|
||||||
DeviceBreakpoints: config.DeviceBreaks{
|
|
||||||
Mobile: 768,
|
|
||||||
Tablet: 1024,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
h := newTestHandler(cfg)
|
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
width int
|
width int
|
||||||
userAgent string
|
|
||||||
want string
|
|
||||||
}{
|
}{
|
||||||
// UA takes priority
|
{"mobile", 375},
|
||||||
{
|
{"tablet", 768},
|
||||||
name: "iphone via UA",
|
{"desktop", 1920},
|
||||||
width: 390,
|
|
||||||
userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15",
|
|
||||||
want: "mobile",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "android phone via UA",
|
|
||||||
width: 0,
|
|
||||||
userAgent: "Mozilla/5.0 (Linux; Android 13; Pixel 7) Mobile Safari/537.36",
|
|
||||||
want: "mobile",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "windows phone via UA",
|
|
||||||
width: 0,
|
|
||||||
userAgent: "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0)",
|
|
||||||
want: "mobile",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "ipad via UA",
|
|
||||||
width: 1024,
|
|
||||||
userAgent: "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15",
|
|
||||||
want: "tablet",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "android tablet via UA (no mobile keyword)",
|
|
||||||
width: 0,
|
|
||||||
userAgent: "Mozilla/5.0 (Linux; Android 13; SM-T870) AppleWebKit/537.36",
|
|
||||||
want: "tablet",
|
|
||||||
},
|
|
||||||
// Falls back to width when UA is desktop
|
|
||||||
{
|
|
||||||
name: "desktop UA wide screen",
|
|
||||||
width: 1920,
|
|
||||||
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0",
|
|
||||||
want: "desktop",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "desktop UA narrow width",
|
|
||||||
width: 500,
|
|
||||||
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0",
|
|
||||||
want: "mobile",
|
|
||||||
},
|
|
||||||
// Width-only fallback
|
|
||||||
{
|
|
||||||
name: "no UA mobile width",
|
|
||||||
width: 375,
|
|
||||||
userAgent: "",
|
|
||||||
want: "mobile",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "no UA tablet width",
|
|
||||||
width: 800,
|
|
||||||
userAgent: "",
|
|
||||||
want: "tablet",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "no UA desktop width",
|
|
||||||
width: 1440,
|
|
||||||
userAgent: "",
|
|
||||||
want: "desktop",
|
|
||||||
},
|
|
||||||
// Unknown
|
|
||||||
{
|
|
||||||
name: "no UA no width",
|
|
||||||
width: 0,
|
|
||||||
userAgent: "",
|
|
||||||
want: "unknown",
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
got := h.classifyDevice(tt.width, tt.userAgent)
|
body := fmt.Sprintf(`{"d":"example.com","p":"/test","w":%d}`, tt.width)
|
||||||
if got != tt.want {
|
req := httptest.NewRequest("POST", "/api/event", bytes.NewBufferString(body))
|
||||||
t.Errorf("classifyDevice(%d, %q) = %q, want %q",
|
w := httptest.NewRecorder()
|
||||||
tt.width, tt.userAgent, got, tt.want)
|
handler.ServeHTTP(w, req)
|
||||||
|
|
||||||
|
if w.Code != http.StatusNoContent {
|
||||||
|
t.Errorf("expected status %d, got %d", http.StatusNoContent, w.Code)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,56 +0,0 @@
|
||||||
package health
|
|
||||||
|
|
||||||
import (
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
|
||||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Holds health and runtime metrics for the watchdog process
|
|
||||||
type Collector struct {
|
|
||||||
buildInfo prometheus.Gauge
|
|
||||||
startTime prometheus.Gauge
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates a health metrics collector with build metadata
|
|
||||||
func NewCollector(version, commit, buildDate string) *Collector {
|
|
||||||
buildInfo := prometheus.NewGauge(prometheus.GaugeOpts{
|
|
||||||
Name: "watchdog_build_info",
|
|
||||||
Help: "Build metadata for the running watchdog instance",
|
|
||||||
ConstLabels: prometheus.Labels{
|
|
||||||
"version": version,
|
|
||||||
"commit": commit,
|
|
||||||
"build_date": buildDate,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
buildInfo.Set(1)
|
|
||||||
|
|
||||||
startTime := prometheus.NewGauge(prometheus.GaugeOpts{
|
|
||||||
Name: "watchdog_start_time_seconds",
|
|
||||||
Help: "Unix timestamp of when the watchdog process started",
|
|
||||||
})
|
|
||||||
startTime.Set(float64(time.Now().Unix()))
|
|
||||||
|
|
||||||
return &Collector{
|
|
||||||
buildInfo: buildInfo,
|
|
||||||
startTime: startTime,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Registers all health metrics plus Go runtime collectors
|
|
||||||
func (c *Collector) Register(reg prometheus.Registerer) error {
|
|
||||||
if err := reg.Register(c.buildInfo); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := reg.Register(c.startTime); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := reg.Register(collectors.NewGoCollector()); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := reg.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
@ -1,103 +0,0 @@
|
||||||
package health
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNewCollector_RegistersMetrics(t *testing.T) {
|
|
||||||
reg := prometheus.NewRegistry()
|
|
||||||
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
|
|
||||||
|
|
||||||
if err := c.Register(reg); err != nil {
|
|
||||||
t.Fatalf("Register failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics, err := reg.Gather()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Gather failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Should have at least build_info and uptime
|
|
||||||
names := make(map[string]bool)
|
|
||||||
for _, m := range metrics {
|
|
||||||
names[m.GetName()] = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if !names["watchdog_build_info"] {
|
|
||||||
t.Error("expected watchdog_build_info metric")
|
|
||||||
}
|
|
||||||
if !names["watchdog_start_time_seconds"] {
|
|
||||||
t.Error("expected watchdog_start_time_seconds metric")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewCollector_BuildInfoLabels(t *testing.T) {
|
|
||||||
reg := prometheus.NewRegistry()
|
|
||||||
c := NewCollector("v1.2.3", "deadbeef", "2026-03-02")
|
|
||||||
|
|
||||||
if err := c.Register(reg); err != nil {
|
|
||||||
t.Fatalf("Register failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics, err := reg.Gather()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Gather failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, m := range metrics {
|
|
||||||
if m.GetName() != "watchdog_build_info" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
labels := make(map[string]string)
|
|
||||||
for _, l := range m.GetMetric()[0].GetLabel() {
|
|
||||||
labels[l.GetName()] = l.GetValue()
|
|
||||||
}
|
|
||||||
|
|
||||||
if labels["version"] != "v1.2.3" {
|
|
||||||
t.Errorf("expected version label %q, got %q", "v1.2.3", labels["version"])
|
|
||||||
}
|
|
||||||
if labels["commit"] != "deadbeef" {
|
|
||||||
t.Errorf("expected commit label %q, got %q", "deadbeef", labels["commit"])
|
|
||||||
}
|
|
||||||
if labels["build_date"] != "2026-03-02" {
|
|
||||||
t.Errorf(
|
|
||||||
"expected build_date label %q, got %q",
|
|
||||||
"2026-03-02",
|
|
||||||
labels["build_date"],
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Error("watchdog_build_info metric not found in gathered metrics")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewCollector_StartTimeIsPositive(t *testing.T) {
|
|
||||||
reg := prometheus.NewRegistry()
|
|
||||||
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
|
|
||||||
|
|
||||||
if err := c.Register(reg); err != nil {
|
|
||||||
t.Fatalf("Register failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics, err := reg.Gather()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Gather failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, m := range metrics {
|
|
||||||
if m.GetName() != "watchdog_start_time_seconds" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
val := m.GetMetric()[0].GetGauge().GetValue()
|
|
||||||
if val <= 0 {
|
|
||||||
t.Errorf("expected positive start time, got %v", val)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Error("watchdog_start_time_seconds metric not found")
|
|
||||||
}
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
package limits
|
package limits
|
||||||
|
|
||||||
import "time"
|
|
||||||
|
|
||||||
// Size limits for request processing
|
// Size limits for request processing
|
||||||
const (
|
const (
|
||||||
MaxEventSize = 4 * 1024 // 4KB max event payload
|
MaxEventSize = 4 * 1024 // 4KB max event payload
|
||||||
|
|
@ -9,12 +7,3 @@ const (
|
||||||
MaxRefLen = 2048 // max referrer length
|
MaxRefLen = 2048 // max referrer length
|
||||||
MaxWidth = 10000 // max reasonable screen width
|
MaxWidth = 10000 // max reasonable screen width
|
||||||
)
|
)
|
||||||
|
|
||||||
// Timeout constants
|
|
||||||
const (
|
|
||||||
HTTPReadTimeout = 10 * time.Second // HTTP server read timeout
|
|
||||||
HTTPWriteTimeout = 10 * time.Second // HTTP server write timeout
|
|
||||||
HTTPIdleTimeout = 60 * time.Second // HTTP server idle timeout
|
|
||||||
ShutdownTimeout = 30 * time.Second // graceful shutdown timeout
|
|
||||||
UniquesUpdatePeriod = 10 * time.Second // HLL gauge update interval
|
|
||||||
)
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"notashelf.dev/watchdog/internal/config"
|
"notashelf.dev/watchdog/internal/config"
|
||||||
"notashelf.dev/watchdog/internal/limits"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type PathNormalizer struct {
|
type PathNormalizer struct {
|
||||||
|
|
@ -15,7 +14,7 @@ type PathNormalizer struct {
|
||||||
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
|
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
|
||||||
return &PathNormalizer{
|
return &PathNormalizer{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
maxLength: limits.MaxPathLen,
|
maxLength: 2048,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
15
main.go
15
main.go
|
|
@ -2,19 +2,6 @@ package main
|
||||||
|
|
||||||
import "notashelf.dev/watchdog/cmd/watchdog"
|
import "notashelf.dev/watchdog/cmd/watchdog"
|
||||||
|
|
||||||
// Injected at build time via ldflags:
|
|
||||||
//
|
|
||||||
// -X main.Version=v1.0.0
|
|
||||||
// -X main.Commit=abc1234
|
|
||||||
// -X main.BuildDate=2026-03-02
|
|
||||||
//
|
|
||||||
// I hate this pattern btw.
|
|
||||||
var (
|
|
||||||
Version = "dev"
|
|
||||||
Commit = "unknown"
|
|
||||||
BuildDate = "unknown"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
watchdog.Main(Version, Commit, BuildDate)
|
watchdog.Main()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ buildGoModule (finalAttrs: {
|
||||||
|
|
||||||
vendorHash = "sha256-jMqPVvMZDm406Gi2g4zNSRJMySLAN7/CR/2NgF+gqtA=";
|
vendorHash = "sha256-jMqPVvMZDm406Gi2g4zNSRJMySLAN7/CR/2NgF+gqtA=";
|
||||||
|
|
||||||
ldflags = ["-s" "-w" "-X main.Version=${finalAttrs.version}"];
|
ldflags = ["-s" "-w" "-X main.version=${finalAttrs.version}"];
|
||||||
|
|
||||||
# Copy web assets
|
# Copy web assets
|
||||||
postInstall = ''
|
postInstall = ''
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue