server: enrich /health with per-upstream status and latency

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I7cb08fc9f55fbd4ee982c19d56798dce6a6a6964
This commit is contained in:
raf 2026-03-06 22:26:40 +03:00
commit 985ed1090a
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
2 changed files with 119 additions and 2 deletions

View file

@ -1,6 +1,7 @@
package server
import (
"encoding/json"
"errors"
"fmt"
"io"
@ -65,8 +66,45 @@ func (s *Server) handleCacheInfo(w http.ResponseWriter, _ *http.Request) {
}
func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) {
type upstreamStatus struct {
URL string `json:"url"`
Status string `json:"status"`
LatencyMs float64 `json:"latency_ms"`
ConsecutiveFails uint32 `json:"consecutive_fails"`
}
type response struct {
Status string `json:"status"`
Upstreams []upstreamStatus `json:"upstreams"`
}
sorted := s.prober.SortedByLatency()
upstreams := make([]upstreamStatus, len(sorted))
var downCount int
var anyDegraded bool
for i, h := range sorted {
upstreams[i] = upstreamStatus{
URL: h.URL,
Status: strings.ToLower(h.Status.String()),
LatencyMs: h.EMALatency,
ConsecutiveFails: h.ConsecutiveFails,
}
if h.Status == prober.StatusDown {
downCount++
} else if h.Status == prober.StatusDegraded {
anyDegraded = true
}
}
overall := "ok"
switch {
case len(sorted) > 0 && downCount == len(sorted):
overall = "down"
case downCount > 0 || anyDegraded:
overall = "degraded"
}
w.Header().Set("Content-Type", "application/json")
fmt.Fprintln(w, `{"status":"ok"}`)
json.NewEncoder(w).Encode(response{Status: overall, Upstreams: upstreams})
}
func (s *Server) handleNarinfo(w http.ResponseWriter, r *http.Request) {

View file

@ -1,6 +1,7 @@
package server_test
import (
"encoding/json"
"fmt"
"io"
"net/http"
@ -315,7 +316,7 @@ func TestNARRoutingUsesCache(t *testing.T) {
}
defer db.Close()
// Pre-seed the route cache: abc123 upstreamA, NarURL = "nar/abc123.nar.xz"
// Pre-seed the route cache: abc123 -> upstreamA, NarURL = "nar/abc123.nar.xz"
if err := db.SetRoute(&cache.RouteEntry{
StorePath: "abc123",
UpstreamURL: upstreamA.URL,
@ -383,3 +384,81 @@ func TestNARFallbackWhenFirstUpstreamMissing(t *testing.T) {
t.Errorf("NAR body = %q, want nar-bytes", body)
}
}
func TestHealthEndpointDegraded(t *testing.T) {
p := prober.New(0.3)
p.InitUpstreams([]config.UpstreamConfig{
{URL: "https://up1.example.com"},
{URL: "https://up2.example.com"},
})
p.RecordLatency("https://up1.example.com", 100)
for range 5 {
p.RecordFailure("https://up2.example.com")
}
db, err := cache.Open(":memory:", 100)
if err != nil {
t.Fatal(err)
}
defer db.Close()
r := router.New(db, p, time.Hour, 5*time.Second, 10*time.Minute)
srv := server.New(r, p, db, []config.UpstreamConfig{
{URL: "https://up1.example.com"},
{URL: "https://up2.example.com"},
}, 30)
req := httptest.NewRequest(http.MethodGet, "/health", nil)
w := httptest.NewRecorder()
srv.ServeHTTP(w, req)
if w.Code != 200 {
t.Fatalf("status = %d", w.Code)
}
var resp struct {
Status string `json:"status"`
Upstreams []struct {
URL string `json:"url"`
Status string `json:"status"`
} `json:"upstreams"`
}
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("decode: %v", err)
}
if resp.Status != "degraded" {
t.Errorf("status = %q, want degraded", resp.Status)
}
if len(resp.Upstreams) != 2 {
t.Errorf("upstreams = %d, want 2", len(resp.Upstreams))
}
}
func TestHealthEndpointAllDown(t *testing.T) {
p := prober.New(0.3)
p.InitUpstreams([]config.UpstreamConfig{{URL: "https://down.example.com"}})
for range 10 {
p.RecordFailure("https://down.example.com")
}
db, err := cache.Open(":memory:", 100)
if err != nil {
t.Fatal(err)
}
defer db.Close()
r := router.New(db, p, time.Hour, 5*time.Second, 10*time.Minute)
srv := server.New(r, p, db, []config.UpstreamConfig{{URL: "https://down.example.com"}}, 30)
req := httptest.NewRequest(http.MethodGet, "/health", nil)
w := httptest.NewRecorder()
srv.ServeHTTP(w, req)
var resp struct {
Status string `json:"status"`
}
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatalf("decode: %v", err)
}
if resp.Status != "down" {
t.Errorf("status = %q, want down", resp.Status)
}
}