Compare commits

...

10 commits

Author SHA1 Message Date
df06ed38bf
docs: provide obserability stack guide
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ibadc31d02413da836e85eaa3d446eb9e6a6a6964
2026-03-02 22:38:33 +03:00
13343ef2bd
nix: format Markdown with deno fmt
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Id652cb01903d1ca4de4b8839118fac556a6a6964
2026-03-02 22:38:32 +03:00
3363e5c923
docs: include process metrics under available exports
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I0df00ecfddf98db1ebc85c2fc7758e326a6a6964
2026-03-02 22:38:31 +03:00
531aafb094
docs: document configuration behaviour; notes on environment vars
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I071c766ba98ed03e0b10928c25af0d0b6a6a6964
2026-03-02 22:38:30 +03:00
dc6b6e0c0c
nix: correct ldflags
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I5806b91c9dc1dfa9690a6e01cd29059b6a6a6964
2026-03-02 22:38:29 +03:00
27b3641717
various: add internal health and runtime metrics
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Iae1dcf8495a00159d588c6e2344312f36a6a6964
2026-03-02 22:38:28 +03:00
d7cdf2cc49
chore: fix typo in dailySalt comment
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I8f0d0bf4bc597f0aecfd98c292f38cdb6a6a6964
2026-03-02 22:38:27 +03:00
6977a501b1
internal: better device classification via UA parsing
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I6c78f1eebe71ef4cf037ebbda2caaeb36a6a6964
2026-03-02 22:38:26 +03:00
896ec1a40a
watchdog: add metrics for blocked requests & logging
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ib1d876859422a6115772962ed9e207a46a6a6964
2026-03-02 22:38:25 +03:00
7b06c4f2ca
various: extract magic numbers into named constants
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I854b2f9b5f39e4629c32e5681e6322826a6a6964
2026-03-02 22:38:24 +03:00
16 changed files with 911 additions and 62 deletions

View file

@ -75,7 +75,7 @@ installation mechanism.
$ go build -o watchdog .
# Run
$ ./watchdog -config config.yaml
$ ./watchdog --config config.yaml
```
## Configuration
@ -207,11 +207,20 @@ While not final, some of the metrics collected are as follows:
- `web_custom_events_total{event}` - Custom event counts
- `web_daily_unique_visitors` - Estimated unique visitors (HyperLogLog)
**Health metrics:**
**Cardinality metrics:**
- `web_path_overflow_total` - Paths rejected due to cardinality limit
- `web_referrer_overflow_total` - Referrers rejected due to limit
- `web_event_overflow_total` - Custom events rejected due to limit
- `web_blocked_requests_total{reason}` - File server requests blocked by
security filters
**Process metrics:**
- `watchdog_build_info{version,commit,build_date}` - Build metadata
- `watchdog_start_time_seconds` - Unix timestamp of process start
- `go_*` - Go runtime metrics (goroutines, GC, memory)
- `process_*` - OS process metrics (CPU, RSS, file descriptors)
## Privacy

View file

@ -11,8 +11,11 @@ import (
)
var (
cfgFile string
cfg *config.Config
cfgFile string
cfg *config.Config
version string
commit string
buildDate string
)
var rootCmd = &cobra.Command{
@ -84,7 +87,8 @@ func initConfig() {
}
}
func Main() {
func Main(v, c, bd string) {
version, commit, buildDate = v, c, bd
if err := rootCmd.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)

View file

@ -11,13 +11,14 @@ import (
"path/filepath"
"strings"
"syscall"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"notashelf.dev/watchdog/internal/aggregate"
"notashelf.dev/watchdog/internal/api"
"notashelf.dev/watchdog/internal/config"
"notashelf.dev/watchdog/internal/health"
"notashelf.dev/watchdog/internal/limits"
"notashelf.dev/watchdog/internal/normalize"
)
@ -31,6 +32,15 @@ func Run(cfg *config.Config) error {
eventRegistry := aggregate.NewCustomEventRegistry(cfg.Limits.MaxCustomEvents)
metricsAgg := aggregate.NewMetricsAggregator(pathRegistry, eventRegistry, cfg)
// Metric for tracking blocked file requests (scrapers/bots)
blockedRequests := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "web_blocked_requests_total",
Help: "File server requests blocked by security filters",
},
[]string{"reason"},
)
// Load HLL state from previous run if it exists
if cfg.Site.SaltRotation != "" {
log.Println("HLL state persistence enabled")
@ -44,6 +54,13 @@ func Run(cfg *config.Config) error {
// Register Prometheus metrics
promRegistry := prometheus.NewRegistry()
metricsAgg.MustRegister(promRegistry)
promRegistry.MustRegister(blockedRequests)
// Register health and runtime metrics
healthCollector := health.NewCollector(version, commit, buildDate)
if err := healthCollector.Register(promRegistry); err != nil {
return fmt.Errorf("failed to register health metrics: %w", err)
}
// Create HTTP handlers
ingestionHandler := api.NewIngestionHandler(
@ -84,16 +101,16 @@ func Run(cfg *config.Config) error {
// Serve whitelisted static files from /web/ if the directory exists
if info, err := os.Stat("web"); err == nil && info.IsDir() {
log.Println("Serving static files from /web/")
mux.Handle("/web/", safeFileServer("web"))
mux.Handle("/web/", safeFileServer("web", blockedRequests))
}
// Create HTTP server with timeouts
srv := &http.Server{
Addr: cfg.Server.ListenAddr,
Handler: mux,
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
IdleTimeout: 60 * time.Second,
ReadTimeout: limits.HTTPReadTimeout,
WriteTimeout: limits.HTTPWriteTimeout,
IdleTimeout: limits.HTTPIdleTimeout,
}
// Start server in goroutine
@ -115,8 +132,8 @@ func Run(cfg *config.Config) error {
case sig := <-shutdown:
log.Printf("Received signal: %v, starting graceful shutdown", sig)
// Give outstanding requests 30 seconds to complete
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
// Give outstanding requests time to complete
ctx, cancel := context.WithTimeout(context.Background(), limits.ShutdownTimeout)
defer cancel()
// Shutdown metrics aggregator.
@ -153,7 +170,7 @@ func basicAuth(next http.Handler, username, password string) http.Handler {
// Creates a file server that only serves whitelisted files. Blocks dotfiles, .git, .env, etc.
// TODO: I need to hook this up to eris somehow so I can just forward the paths that are being
// scanned despite not being on a whitelist. Would be a good way of detecting scrapers, maybe.
func safeFileServer(root string) http.Handler {
func safeFileServer(root string, blockedRequests *prometheus.CounterVec) http.Handler {
fs := http.FileServer(http.Dir(root))
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Clean the path
@ -161,6 +178,8 @@ func safeFileServer(root string) http.Handler {
// Block directory listings
if strings.HasSuffix(path, "/") {
blockedRequests.WithLabelValues("directory_listing").Inc()
log.Printf("Blocked directory listing attempt: %s from %s", path, r.RemoteAddr)
http.NotFound(w, r)
return
}
@ -168,6 +187,8 @@ func safeFileServer(root string) http.Handler {
// Block dotfiles and sensitive files
for segment := range strings.SplitSeq(path, "/") {
if strings.HasPrefix(segment, ".") {
blockedRequests.WithLabelValues("dotfile").Inc()
log.Printf("Blocked dotfile access: %s from %s", path, r.RemoteAddr)
http.NotFound(w, r)
return
}
@ -177,6 +198,8 @@ func safeFileServer(root string) http.Handler {
strings.Contains(lower, "config") ||
strings.HasSuffix(lower, ".bak") ||
strings.HasSuffix(lower, "~") {
blockedRequests.WithLabelValues("sensitive_file").Inc()
log.Printf("Blocked sensitive file access: %s from %s", path, r.RemoteAddr)
http.NotFound(w, r)
return
}
@ -185,6 +208,8 @@ func safeFileServer(root string) http.Handler {
// Only serve .js, .html, .css files
ext := strings.ToLower(filepath.Ext(path))
if ext != ".js" && ext != ".html" && ext != ".css" {
blockedRequests.WithLabelValues("invalid_extension").Inc()
log.Printf("Blocked invalid extension: %s from %s", path, r.RemoteAddr)
http.NotFound(w, r)
return
}

237
docs/configuration.md Normal file
View file

@ -0,0 +1,237 @@
# Configuration
Watchdog supports multiple configuration sources with the following precedence
(highest to lowest):
1. **Command-line flags**
2. **Environment variables**
3. **Configuration file**
4. **Defaults**
## Configuration File
The primary configuration method is via YAML file. By default, Watchdog looks
for:
- `./config.yaml` (current directory)
- `/etc/watchdog/config.yaml` (system-wide)
Specify a custom location:
```bash
# Provide your configuration YAML file with --config
$ watchdog --config /path/to/config.yaml
```
See [config.example.yaml](../config.example.yaml) for all available options.
## Environment Variables
All configuration options can be set via environment variables with the
`WATCHDOG_` prefix.
Nested fields use underscore separators. For example:
```bash
# site.domains
$ export WATCHDOG_SITE_DOMAINS="example.com,blog.example.com"
# server.listen_addr
$ export WATCHDOG_SERVER_LISTEN_ADDR="127.0.0.1:8080"
# site.collect.pageviews
$ export WATCHDOG_SITE_COLLECT_PAGEVIEWS=true
# limits.max_paths
$ export WATCHDOG_LIMITS_MAX_PATHS=10000
```
### Common Environment Variables
```bash
# Server
WATCHDOG_SERVER_LISTEN_ADDR="127.0.0.1:8080"
WATCHDOG_SERVER_METRICS_PATH="/metrics"
WATCHDOG_SERVER_INGESTION_PATH="/api/event"
WATCHDOG_SERVER_STATE_PATH="/var/lib/watchdog/hll.state"
# Site
WATCHDOG_SITE_DOMAINS="example.com" # comma-separated for multiple
WATCHDOG_SITE_SALT_ROTATION="daily"
WATCHDOG_SITE_SAMPLING=1.0
# Collection
WATCHDOG_SITE_COLLECT_PAGEVIEWS=true
WATCHDOG_SITE_COLLECT_COUNTRY=true
WATCHDOG_SITE_COLLECT_DEVICE=true
WATCHDOG_SITE_COLLECT_REFERRER="domain"
WATCHDOG_SITE_COLLECT_DOMAIN=false
# Limits
WATCHDOG_LIMITS_MAX_PATHS=10000
WATCHDOG_LIMITS_MAX_SOURCES=500
WATCHDOG_LIMITS_MAX_CUSTOM_EVENTS=100
WATCHDOG_LIMITS_MAX_EVENTS_PER_MINUTE=10000
# Security
WATCHDOG_SECURITY_CORS_ENABLED=false
WATCHDOG_SECURITY_METRICS_AUTH_ENABLED=false
WATCHDOG_SECURITY_METRICS_AUTH_USERNAME="admin"
WATCHDOG_SECURITY_METRICS_AUTH_PASSWORD="changeme"
```
## Command-Line Flags
Command-line flags override both config file and environment variables:
```bash
# Override server address
watchdog --listen-addr :9090
# Override metrics path
watchdog --metrics-path /prometheus/metrics
# Override ingestion path
watchdog --ingestion-path /api/v1/event
# Combine multiple overrides
watchdog --config prod.yaml --listen-addr :9090 --metrics-path /metrics
```
Available flags:
- `--config string` - Path to config file
- `--listen-addr string` - Server listen address
- `--metrics-path string` - Metrics endpoint path
- `--ingestion-path string` - Ingestion endpoint path
## Configuration Precedence Example
Given:
**config.yaml:**
```yaml
server:
listen_addr: ":8080"
metrics_path: "/metrics"
```
**Environment:**
```bash
export WATCHDOG_SERVER_LISTEN_ADDR=":9090"
```
**Command:**
```bash
watchdog --metrics-path "/prometheus/metrics"
```
**Result:**
- `listen_addr`: `:9090` (from environment variable)
- `metrics_path`: `/prometheus/metrics` (from CLI flag)
## Systemd Integration
Environment variables work seamlessly with systemd:
```ini
[Service]
Environment="WATCHDOG_SERVER_LISTEN_ADDR=127.0.0.1:8080"
Environment="WATCHDOG_SITE_DOMAINS=example.com"
Environment="WATCHDOG_LIMITS_MAX_PATHS=10000"
ExecStart=/usr/local/bin/watchdog --config /etc/watchdog/config.yaml
```
Or use `EnvironmentFile`:
```ini
[Service]
EnvironmentFile=/etc/watchdog/env
ExecStart=/usr/local/bin/watchdog
```
**/etc/watchdog/env:**
```bash
WATCHDOG_SERVER_LISTEN_ADDR=127.0.0.1:8080
WATCHDOG_SITE_DOMAINS=example.com
WATCHDOG_LIMITS_MAX_PATHS=10000
```
## NixOS Integration
NixOS configuration automatically converts to the correct format:
```nix
{
services.watchdog = {
enable = true;
settings = {
site.domains = [ "example.com" ];
server.listen_addr = "127.0.0.1:8080";
limits.max_paths = 10000;
};
};
}
```
This is equivalent to setting environment variables or using a config file.
## Validation
Configuration is validated on startup. Invalid values will cause Watchdog to
exit with an error:
```bash
$ watchdog
Error: config validation failed: site.domains is required
```
Common validation errors:
- `site.domains is required` - No domains configured
- `limits.max_paths must be greater than 0` - Invalid cardinality limit
- `site.collect.referrer must be 'off', 'domain', or 'url'` - Invalid referrer
mode
- `site.sampling must be between 0.0 and 1.0` - Invalid sampling rate
## Best Practices
1. **Use config file for base configuration** - Easier to version control and
review
2. **Use environment variables for secrets** - Don't commit passwords to config
files
3. **Use CLI flags for testing/overrides** - Quick temporary changes without
editing files
Example hybrid approach:
**config.yaml:**
```yaml
site:
domains:
- example.com
collect:
pageviews: true
device: true
limits:
max_paths: 10000
```
**Environment (secrets):**
```bash
export WATCHDOG_SECURITY_METRICS_AUTH_PASSWORD="$SECRET_PASSWORD"
```
**CLI (testing):**
```bash
watchdog --listen-addr :9090 # Test on different port
```

300
docs/observability.md Normal file
View file

@ -0,0 +1,300 @@
# Observability Setup
Watchdog exposes Prometheus-formatted metrics at `/metrics`. You need a
time-series database to scrape and store these metrics, then visualize them in
Grafana.
> [!IMPORTANT]
>
> **Why you need Prometheus:**
>
> - Watchdog exposes _current state_ (counters, gauges)
> - Prometheus _scrapes periodically_ and _stores time-series data_
> - Grafana _visualizes_ the historical data from Prometheus
> - Grafana cannot directly scrape Prometheus `/metrics` endpoints
## Prometheus Setup
### Configuring Prometheus
Create `/etc/prometheus/prometheus.yml`:
```yaml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: "watchdog"
static_configs:
- targets: ["localhost:8080"]
# Optional: scrape multiple Watchdog instances
# static_configs:
# - targets:
# - 'watchdog-1.example.com:8080'
# - 'watchdog-2.example.com:8080'
# labels:
# instance: 'production'
# Scrape Prometheus itself
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
```
### Verify Prometheus' health state
```bash
# Check Prometheus is running
curl http://localhost:9090/-/healthy
# Check it's scraping Watchdog
curl http://localhost:9090/api/v1/targets
```
### NixOS
Add to your NixOS configuration:
```nix
{
services.prometheus = {
enable = true;
port = 9090;
# Retention period
retentionTime = "30d";
scrapeConfigs = [
{
job_name = "watchdog";
static_configs = [{
targets = [ "localhost:8080" ];
}];
}
];
};
# Open firewall if needed
# networking.firewall.allowedTCPPorts = [ 9090 ];
}
```
For multiple Watchdog instances:
```nix
{
services.prometheus.scrapeConfigs = [
{
job_name = "watchdog";
static_configs = [
{
labels.env = "production";
targets = [
"watchdog-1:8080"
"watchdog-2:8080"
"watchdog-3:8080"
];
}
];
}
];
}
```
## Grafana Setup
### NixOS
```nix
{
services.grafana = {
enable = true;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = 3000;
};
};
provision = {
enable = true;
datasources.settings.datasources = [{
name = "Prometheus";
type = "prometheus";
url = "http://localhost:9090";
isDefault = true;
}];
};
};
}
```
### Configure Data Source (Manual)
If you're not using NixOS for provisioning, then you'll need to do provisioning
_imperatively_ from your Grafana configuration. Ths can be done through the
admin panel by navigating to `Configuration`, and choosing "add data source"
under `Data Sources`. Select your prometheus instance, and save it.
### Import Pre-built Dashboard
A sample Grafana dashboard is provided with support for multi-host and
multi-site configurations. Import it, configure the data source and it should
work out of the box.
If you're not using NixOS for provisioning, the dashboard _also_ needs to be
provisioned manually. Under `Dashboards`, select `Import` and provide the JSON
contents or upload the sample dashboard from `contrib/grafana/watchdog.json`.
Select your Prometheus data source and import it.
See [contrib/grafana/README.md](../contrib/grafana/README.md) for full
documentation.
## Example Queries
Once Prometheus is scraping Watchdog and Grafana is connected, you may write
your own widgets or create queries. Here are some example queries using
Prometheus query language, promql. Those are provided as examples and might not
provide everything you need. Nevertheless, use them to improve your setup at
your disposal.
If you believe you have some valuable widgets that you'd like to contribute
back, feel free!
### Top 10 Pages by Traffic
```promql
topk(10, sum by (path) (rate(web_pageviews_total[5m])))
```
### Mobile vs Desktop Split
```promql
sum by (device) (rate(web_pageviews_total[1h]))
```
### Unique Visitors
```promql
web_daily_unique_visitors
```
### Top Referrers
```promql
topk(10, sum by (referrer) (rate(web_pageviews_total{referrer!="direct"}[1d])))
```
### Multi-Site: Traffic per Domain
```promql
sum by (domain) (rate(web_pageviews_total[1h]))
```
### Cardinality Health
```promql
# Should be near zero
rate(web_path_overflow_total[5m])
rate(web_referrer_overflow_total[5m])
rate(web_event_overflow_total[5m])
```
## Horizontal Scaling Considerations
When running multiple Watchdog instances:
1. **Each instance exposes its own metrics** - Prometheus scrapes all instances
2. **Prometheus aggregates automatically** - use `sum()` in queries to aggregate
across instances
3. **No shared state needed** - each Watchdog instance is independent
Watchdog is almost entirely stateless, so horizontal scaling should be trivial
as long as you have the necessary infrastructure and, well, the patience.
Example with 3 instances:
```promql
# Total pageviews across all instances
sum(rate(web_pageviews_total[5m]))
# Per-instance breakdown
sum by (instance) (rate(web_pageviews_total[5m]))
```
## Alternatives to Prometheus
### VictoriaMetrics
Drop-in Prometheus replacement with better performance and compression:
```nix
{
services.victoriametrics = {
enable = true;
listenAddress = ":8428";
retentionPeriod = "12month";
};
# Configure Prometheus to remote-write to VictoriaMetrics
services.prometheus = {
enable = true;
remoteWrite = [{
url = "http://localhost:8428/api/v1/write";
}];
};
}
```
### Grafana Agent
Lightweight alternative that scrapes and forwards to Grafana Cloud or local
Prometheus:
```bash
# Systemd setup for Grafana Agent
sudo systemctl enable --now grafana-agent
```
```yaml
# /etc/grafana-agent.yaml
metrics:
wal_directory: /var/lib/grafana-agent
configs:
- name: watchdog
scrape_configs:
- job_name: watchdog
static_configs:
- targets: ["localhost:8080"]
remote_write:
- url: http://localhost:9090/api/v1/write
```
## Monitoring the Monitoring
Monitor Prometheus itself:
```promql
# Prometheus scrape success rate
up{job="watchdog"}
# Scrape duration
scrape_duration_seconds{job="watchdog"}
# Time since last scrape
time() - timestamp(up{job="watchdog"})
```
## Additional Recommendations
1. **Retention**: Set `--storage.tsdb.retention.time=30d` or longer based on
disk space
2. **Backups**: Back up `/var/lib/prometheus` periodically (or whatever your
state directory is)
3. **Alerting**: Configure Prometheus alerting rules for critical metrics
4. **High Availability**: Run multiple Prometheus instances with identical
configs
5. **Remote Storage**: For long-term storage, use Thanos, Cortex, or
VictoriaMetrics

View file

@ -27,6 +27,7 @@
pkgs.alejandra
pkgs.fd
pkgs.prettier
pkgs.deno
pkgs.go # provides gofmt
pkgs.golines
];
@ -38,6 +39,9 @@
# Format HTML & Javascript files with Prettier
fd "$@" -t f -e html -e js -x prettier -w '{}'
# Format Markdown with Deno's Markdown formatter
fd "$@" -t f -e md -x deno fmt -q '{}'
# Format go files with both gofmt & golines
fd "$@" -t f -e go -x golines -l -w --max-len=110 \
--base-formatter=gofmt \

View file

@ -9,6 +9,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"notashelf.dev/watchdog/internal/config"
"notashelf.dev/watchdog/internal/limits"
)
var prometheusLabelPattern = regexp.MustCompile(`^[a-zA-Z0-9_/:.-]*$`)
@ -121,11 +122,11 @@ func NewMetricsAggregator(
return m
}
// Background goroutine to update the unique visitors gauge every 10 seconds
// Background goroutine to update the unique visitors gauge periodically
// instead of on every request. This should help with performance.
func (m *MetricsAggregator) updateUniquesGauge() {
defer m.wg.Done()
ticker := time.NewTicker(10 * time.Second)
ticker := time.NewTicker(limits.UniquesUpdatePeriod)
defer ticker.Stop()
for {

View file

@ -53,7 +53,7 @@ func (u *UniquesEstimator) Estimate() uint64 {
return u.hll.Estimate()
}
// Cenerates a deterministic salt based on the current date
// Generates a deterministic salt based on the current date
// Same day = same salt, different day = different salt
func dailySalt(t time.Time) string {
// Use UTC to ensure consistent rotation regardless of timezone

View file

@ -137,7 +137,7 @@ func (h *IngestionHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// Device classification
if h.cfg.Site.Collect.Device {
device = h.classifyDevice(event.Width)
device = h.classifyDevice(event.Width, userAgent)
}
// Referrer classification
@ -271,19 +271,43 @@ func (h *IngestionHandler) ipInCIDR(ip, cidr string) bool {
return network.Contains(testIP)
}
// Classifies screen width into device categories using configured breakpoints
// FIXME: we need a more robust mechanism for classifying devices. Breakpoints
// are the only ones I can think of *right now* but I'm positive there are better
// mechanisns. We'll get to this later.
func (h *IngestionHandler) classifyDevice(width int) string {
if width == 0 {
return "unknown"
}
if width < h.cfg.Limits.DeviceBreakpoints.Mobile {
return "mobile"
}
if width < h.cfg.Limits.DeviceBreakpoints.Tablet {
// Classifies device using both screen width and User-Agent parsing
// Uses UA hints for better detection, falls back to width breakpoints
func (h *IngestionHandler) classifyDevice(width int, userAgent string) string {
// First try User-Agent based detection for better accuracy
ua := strings.ToLower(userAgent)
// Tablet detection via UA (must come before mobile: Android tablets lack "mobile" keyword)
if strings.Contains(ua, "tablet") ||
strings.Contains(ua, "ipad") ||
(strings.Contains(ua, "android") && !strings.Contains(ua, "mobile")) {
return "tablet"
}
return "desktop"
// Mobile detection via UA
if strings.Contains(ua, "mobile") ||
strings.Contains(ua, "iphone") ||
strings.Contains(ua, "ipod") ||
strings.Contains(ua, "windows phone") ||
strings.Contains(ua, "blackberry") {
return "mobile"
}
// If UA doesn't provide clear signal, use width breakpoints
if width > 0 {
if width < h.cfg.Limits.DeviceBreakpoints.Mobile {
return "mobile"
}
if width < h.cfg.Limits.DeviceBreakpoints.Tablet {
return "tablet"
}
return "desktop"
}
// Default to desktop if UA suggests desktop browser
if userAgent != "" {
return "desktop"
}
return "unknown"
}

View file

@ -2,7 +2,6 @@ package api
import (
"bytes"
"fmt"
"net/http"
"net/http/httptest"
"testing"
@ -207,51 +206,113 @@ func TestIngestionHandler_InvalidJSON(t *testing.T) {
}
}
func TestIngestionHandler_DeviceClassification(t *testing.T) {
cfg := config.Config{
Site: config.SiteConfig{
Domains: []string{"example.com"},
Collect: config.CollectConfig{
Pageviews: true,
Device: true,
},
Path: config.PathConfig{},
},
Limits: config.LimitsConfig{
MaxPaths: 100,
MaxSources: 50,
},
}
func newTestHandler(cfg *config.Config) *IngestionHandler {
pathNorm := normalize.NewPathNormalizer(cfg.Site.Path)
pathRegistry := aggregate.NewPathRegistry(cfg.Limits.MaxPaths)
refRegistry := normalize.NewReferrerRegistry(cfg.Limits.MaxSources)
metricsAgg := aggregate.NewMetricsAggregator(
pathRegistry,
aggregate.NewCustomEventRegistry(100),
&cfg,
cfg,
)
return NewIngestionHandler(cfg, pathNorm, pathRegistry, refRegistry, metricsAgg)
}
handler := NewIngestionHandler(&cfg, pathNorm, pathRegistry, refRegistry, metricsAgg)
func TestClassifyDevice_UA(t *testing.T) {
cfg := &config.Config{
Limits: config.LimitsConfig{
DeviceBreakpoints: config.DeviceBreaks{
Mobile: 768,
Tablet: 1024,
},
},
}
h := newTestHandler(cfg)
tests := []struct {
name string
width int
name string
width int
userAgent string
want string
}{
{"mobile", 375},
{"tablet", 768},
{"desktop", 1920},
// UA takes priority
{
name: "iphone via UA",
width: 390,
userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15",
want: "mobile",
},
{
name: "android phone via UA",
width: 0,
userAgent: "Mozilla/5.0 (Linux; Android 13; Pixel 7) Mobile Safari/537.36",
want: "mobile",
},
{
name: "windows phone via UA",
width: 0,
userAgent: "Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0)",
want: "mobile",
},
{
name: "ipad via UA",
width: 1024,
userAgent: "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15",
want: "tablet",
},
{
name: "android tablet via UA (no mobile keyword)",
width: 0,
userAgent: "Mozilla/5.0 (Linux; Android 13; SM-T870) AppleWebKit/537.36",
want: "tablet",
},
// Falls back to width when UA is desktop
{
name: "desktop UA wide screen",
width: 1920,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0",
want: "desktop",
},
{
name: "desktop UA narrow width",
width: 500,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0",
want: "mobile",
},
// Width-only fallback
{
name: "no UA mobile width",
width: 375,
userAgent: "",
want: "mobile",
},
{
name: "no UA tablet width",
width: 800,
userAgent: "",
want: "tablet",
},
{
name: "no UA desktop width",
width: 1440,
userAgent: "",
want: "desktop",
},
// Unknown
{
name: "no UA no width",
width: 0,
userAgent: "",
want: "unknown",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
body := fmt.Sprintf(`{"d":"example.com","p":"/test","w":%d}`, tt.width)
req := httptest.NewRequest("POST", "/api/event", bytes.NewBufferString(body))
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
if w.Code != http.StatusNoContent {
t.Errorf("expected status %d, got %d", http.StatusNoContent, w.Code)
got := h.classifyDevice(tt.width, tt.userAgent)
if got != tt.want {
t.Errorf("classifyDevice(%d, %q) = %q, want %q",
tt.width, tt.userAgent, got, tt.want)
}
})
}

View file

@ -0,0 +1,56 @@
package health
import (
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
)
// Holds health and runtime metrics for the watchdog process
type Collector struct {
buildInfo prometheus.Gauge
startTime prometheus.Gauge
}
// Creates a health metrics collector with build metadata
func NewCollector(version, commit, buildDate string) *Collector {
buildInfo := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "watchdog_build_info",
Help: "Build metadata for the running watchdog instance",
ConstLabels: prometheus.Labels{
"version": version,
"commit": commit,
"build_date": buildDate,
},
})
buildInfo.Set(1)
startTime := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "watchdog_start_time_seconds",
Help: "Unix timestamp of when the watchdog process started",
})
startTime.Set(float64(time.Now().Unix()))
return &Collector{
buildInfo: buildInfo,
startTime: startTime,
}
}
// Registers all health metrics plus Go runtime collectors
func (c *Collector) Register(reg prometheus.Registerer) error {
if err := reg.Register(c.buildInfo); err != nil {
return err
}
if err := reg.Register(c.startTime); err != nil {
return err
}
if err := reg.Register(collectors.NewGoCollector()); err != nil {
return err
}
if err := reg.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})); err != nil {
return err
}
return nil
}

View file

@ -0,0 +1,103 @@
package health
import (
"testing"
"github.com/prometheus/client_golang/prometheus"
)
func TestNewCollector_RegistersMetrics(t *testing.T) {
reg := prometheus.NewRegistry()
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
if err := c.Register(reg); err != nil {
t.Fatalf("Register failed: %v", err)
}
metrics, err := reg.Gather()
if err != nil {
t.Fatalf("Gather failed: %v", err)
}
// Should have at least build_info and uptime
names := make(map[string]bool)
for _, m := range metrics {
names[m.GetName()] = true
}
if !names["watchdog_build_info"] {
t.Error("expected watchdog_build_info metric")
}
if !names["watchdog_start_time_seconds"] {
t.Error("expected watchdog_start_time_seconds metric")
}
}
func TestNewCollector_BuildInfoLabels(t *testing.T) {
reg := prometheus.NewRegistry()
c := NewCollector("v1.2.3", "deadbeef", "2026-03-02")
if err := c.Register(reg); err != nil {
t.Fatalf("Register failed: %v", err)
}
metrics, err := reg.Gather()
if err != nil {
t.Fatalf("Gather failed: %v", err)
}
for _, m := range metrics {
if m.GetName() != "watchdog_build_info" {
continue
}
labels := make(map[string]string)
for _, l := range m.GetMetric()[0].GetLabel() {
labels[l.GetName()] = l.GetValue()
}
if labels["version"] != "v1.2.3" {
t.Errorf("expected version label %q, got %q", "v1.2.3", labels["version"])
}
if labels["commit"] != "deadbeef" {
t.Errorf("expected commit label %q, got %q", "deadbeef", labels["commit"])
}
if labels["build_date"] != "2026-03-02" {
t.Errorf(
"expected build_date label %q, got %q",
"2026-03-02",
labels["build_date"],
)
}
return
}
t.Error("watchdog_build_info metric not found in gathered metrics")
}
func TestNewCollector_StartTimeIsPositive(t *testing.T) {
reg := prometheus.NewRegistry()
c := NewCollector("v0.1.0", "abc1234", "2026-03-02")
if err := c.Register(reg); err != nil {
t.Fatalf("Register failed: %v", err)
}
metrics, err := reg.Gather()
if err != nil {
t.Fatalf("Gather failed: %v", err)
}
for _, m := range metrics {
if m.GetName() != "watchdog_start_time_seconds" {
continue
}
val := m.GetMetric()[0].GetGauge().GetValue()
if val <= 0 {
t.Errorf("expected positive start time, got %v", val)
}
return
}
t.Error("watchdog_start_time_seconds metric not found")
}

View file

@ -1,5 +1,7 @@
package limits
import "time"
// Size limits for request processing
const (
MaxEventSize = 4 * 1024 // 4KB max event payload
@ -7,3 +9,12 @@ const (
MaxRefLen = 2048 // max referrer length
MaxWidth = 10000 // max reasonable screen width
)
// Timeout constants
const (
HTTPReadTimeout = 10 * time.Second // HTTP server read timeout
HTTPWriteTimeout = 10 * time.Second // HTTP server write timeout
HTTPIdleTimeout = 60 * time.Second // HTTP server idle timeout
ShutdownTimeout = 30 * time.Second // graceful shutdown timeout
UniquesUpdatePeriod = 10 * time.Second // HLL gauge update interval
)

View file

@ -4,6 +4,7 @@ import (
"strings"
"notashelf.dev/watchdog/internal/config"
"notashelf.dev/watchdog/internal/limits"
)
type PathNormalizer struct {
@ -14,7 +15,7 @@ type PathNormalizer struct {
func NewPathNormalizer(cfg config.PathConfig) *PathNormalizer {
return &PathNormalizer{
cfg: cfg,
maxLength: 2048,
maxLength: limits.MaxPathLen,
}
}

15
main.go
View file

@ -2,6 +2,19 @@ package main
import "notashelf.dev/watchdog/cmd/watchdog"
// Injected at build time via ldflags:
//
// -X main.Version=v1.0.0
// -X main.Commit=abc1234
// -X main.BuildDate=2026-03-02
//
// I hate this pattern btw.
var (
Version = "dev"
Commit = "unknown"
BuildDate = "unknown"
)
func main() {
watchdog.Main()
watchdog.Main(Version, Commit, BuildDate)
}

View file

@ -22,7 +22,7 @@ buildGoModule (finalAttrs: {
vendorHash = "sha256-jMqPVvMZDm406Gi2g4zNSRJMySLAN7/CR/2NgF+gqtA=";
ldflags = ["-s" "-w" "-X main.version=${finalAttrs.version}"];
ldflags = ["-s" "-w" "-X main.Version=${finalAttrs.version}"];
# Copy web assets
postInstall = ''