cache: add SQLite route persistence; initial TTL and LRU eviction implementation
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I0370d6c114d5490634905c1a831a31526a6a6964
This commit is contained in:
parent
9f264fbef1
commit
663f9995b2
8 changed files with 674 additions and 5 deletions
|
|
@ -1 +1,176 @@
|
|||
package prober
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Upstream health status.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
StatusActive Status = iota
|
||||
StatusDegraded // 3+ consecutive failures
|
||||
StatusDown // 10+ consecutive failures
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
switch s {
|
||||
case StatusActive:
|
||||
return "ACTIVE"
|
||||
case StatusDegraded:
|
||||
return "DEGRADED"
|
||||
default:
|
||||
return "DOWN"
|
||||
}
|
||||
}
|
||||
|
||||
// In-memory metrics for one upstream.
|
||||
type UpstreamHealth struct {
|
||||
URL string
|
||||
EMALatency float64
|
||||
LastProbe time.Time
|
||||
ConsecutiveFails uint32
|
||||
TotalQueries uint64
|
||||
Status Status
|
||||
}
|
||||
|
||||
// Tracks latency and health for a set of upstreams.
|
||||
type Prober struct {
|
||||
mu sync.RWMutex
|
||||
alpha float64
|
||||
table map[string]*UpstreamHealth
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// Creates a Prober with the given EMA alpha coefficient.
|
||||
func New(alpha float64) *Prober {
|
||||
return &Prober{
|
||||
alpha: alpha,
|
||||
table: make(map[string]*UpstreamHealth),
|
||||
client: &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Seeds the prober with upstream URLs (no measurements yet).
|
||||
func (p *Prober) InitUpstreams(urls []string) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
for _, u := range urls {
|
||||
if _, ok := p.table[u]; !ok {
|
||||
p.table[u] = &UpstreamHealth{URL: u, Status: StatusActive}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Records a successful latency measurement and updates the EMA.
|
||||
func (p *Prober) RecordLatency(url string, ms float64) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
h := p.getOrCreate(url)
|
||||
if h.TotalQueries == 0 {
|
||||
h.EMALatency = ms
|
||||
} else {
|
||||
h.EMALatency = p.alpha*ms + (1-p.alpha)*h.EMALatency
|
||||
}
|
||||
h.ConsecutiveFails = 0
|
||||
h.TotalQueries++
|
||||
h.Status = StatusActive
|
||||
h.LastProbe = time.Now()
|
||||
}
|
||||
|
||||
// Records a probe failure.
|
||||
func (p *Prober) RecordFailure(url string) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
h := p.getOrCreate(url)
|
||||
h.ConsecutiveFails++
|
||||
switch {
|
||||
case h.ConsecutiveFails >= 10:
|
||||
h.Status = StatusDown
|
||||
case h.ConsecutiveFails >= 3:
|
||||
h.Status = StatusDegraded
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a copy of the health entry for url, or nil if unknown.
|
||||
func (p *Prober) GetHealth(url string) *UpstreamHealth {
|
||||
p.mu.RLock()
|
||||
defer p.mu.RUnlock()
|
||||
h, ok := p.table[url]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
cp := *h
|
||||
return &cp
|
||||
}
|
||||
|
||||
// Returns all known upstreams sorted by EMA latency (ascending). DOWN upstreams last.
|
||||
func (p *Prober) SortedByLatency() []*UpstreamHealth {
|
||||
p.mu.RLock()
|
||||
defer p.mu.RUnlock()
|
||||
result := make([]*UpstreamHealth, 0, len(p.table))
|
||||
for _, h := range p.table {
|
||||
cp := *h
|
||||
result = append(result, &cp)
|
||||
}
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
if result[i].Status == StatusDown && result[j].Status != StatusDown {
|
||||
return false
|
||||
}
|
||||
if result[j].Status == StatusDown && result[i].Status != StatusDown {
|
||||
return true
|
||||
}
|
||||
return result[i].EMALatency < result[j].EMALatency
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
// Performs a HEAD /nix-cache-info against url and updates health.
|
||||
func (p *Prober) ProbeUpstream(url string) {
|
||||
start := time.Now()
|
||||
resp, err := p.client.Head(url + "/nix-cache-info")
|
||||
elapsed := float64(time.Since(start).Nanoseconds()) / 1e6
|
||||
|
||||
if err != nil || resp.StatusCode != 200 {
|
||||
p.RecordFailure(url)
|
||||
return
|
||||
}
|
||||
resp.Body.Close()
|
||||
p.RecordLatency(url, elapsed)
|
||||
}
|
||||
|
||||
// Probes all known upstreams on interval until stop is closed.
|
||||
func (p *Prober) RunProbeLoop(interval time.Duration, stop <-chan struct{}) {
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-stop:
|
||||
return
|
||||
case <-ticker.C:
|
||||
p.mu.RLock()
|
||||
urls := make([]string, 0, len(p.table))
|
||||
for u := range p.table {
|
||||
urls = append(urls, u)
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
for _, u := range urls {
|
||||
go p.ProbeUpstream(u)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Prober) getOrCreate(url string) *UpstreamHealth {
|
||||
h, ok := p.table[url]
|
||||
if !ok {
|
||||
h = &UpstreamHealth{URL: url, Status: StatusActive}
|
||||
p.table[url] = h
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
|
|
|||
100
internal/prober/prober_test.go
Normal file
100
internal/prober/prober_test.go
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
package prober_test
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"notashelf.dev/ncro/internal/prober"
|
||||
)
|
||||
|
||||
func TestEMACalculation(t *testing.T) {
|
||||
p := prober.New(0.3)
|
||||
p.RecordLatency("https://example.com", 100)
|
||||
p.RecordLatency("https://example.com", 50)
|
||||
|
||||
// EMA after 2 measurements: first=100, second = 0.3*50 + 0.7*100 = 85
|
||||
health := p.GetHealth("https://example.com")
|
||||
if health == nil {
|
||||
t.Fatal("expected health entry")
|
||||
}
|
||||
if health.EMALatency < 84 || health.EMALatency > 86 {
|
||||
t.Errorf("EMA = %.2f, want ~85", health.EMALatency)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusProgression(t *testing.T) {
|
||||
p := prober.New(0.3)
|
||||
p.RecordLatency("https://example.com", 10)
|
||||
|
||||
for range 3 {
|
||||
p.RecordFailure("https://example.com")
|
||||
}
|
||||
h := p.GetHealth("https://example.com")
|
||||
if h.Status != prober.StatusDegraded {
|
||||
t.Errorf("status = %v, want Degraded after 3 failures", h.Status)
|
||||
}
|
||||
|
||||
for range 7 {
|
||||
p.RecordFailure("https://example.com")
|
||||
}
|
||||
h = p.GetHealth("https://example.com")
|
||||
if h.Status != prober.StatusDown {
|
||||
t.Errorf("status = %v, want Down after 10 failures", h.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecoveryAfterSuccess(t *testing.T) {
|
||||
p := prober.New(0.3)
|
||||
for range 10 {
|
||||
p.RecordFailure("https://example.com")
|
||||
}
|
||||
p.RecordLatency("https://example.com", 20)
|
||||
h := p.GetHealth("https://example.com")
|
||||
if h.Status != prober.StatusActive {
|
||||
t.Errorf("status = %v, want Active after recovery", h.Status)
|
||||
}
|
||||
if h.ConsecutiveFails != 0 {
|
||||
t.Errorf("ConsecutiveFails = %d, want 0", h.ConsecutiveFails)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortedByLatency(t *testing.T) {
|
||||
p := prober.New(0.3)
|
||||
p.RecordLatency("https://slow.example.com", 200)
|
||||
p.RecordLatency("https://fast.example.com", 10)
|
||||
p.RecordLatency("https://medium.example.com", 50)
|
||||
|
||||
sorted := p.SortedByLatency()
|
||||
if len(sorted) != 3 {
|
||||
t.Fatalf("expected 3, got %d", len(sorted))
|
||||
}
|
||||
if sorted[0].URL != "https://fast.example.com" {
|
||||
t.Errorf("first = %q, want fast", sorted[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeUpstream(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := prober.New(0.3)
|
||||
p.ProbeUpstream(srv.URL)
|
||||
|
||||
h := p.GetHealth(srv.URL)
|
||||
if h == nil || h.Status != prober.StatusActive {
|
||||
t.Errorf("expected Active after successful probe, got %v", h)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeUpstreamFailure(t *testing.T) {
|
||||
p := prober.New(0.3)
|
||||
p.ProbeUpstream("http://127.0.0.1:1") // nothing listening
|
||||
|
||||
h := p.GetHealth("http://127.0.0.1:1")
|
||||
if h == nil || h.ConsecutiveFails == 0 {
|
||||
t.Error("expected failure recorded")
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue