ncro/crates/router/src/lib.rs
NotAShelf 265a30d3c8
meta: prepare for publishing
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I02ee54baee048c58a480522ce79159eb6a6a6964
2026-05-11 13:28:35 +03:00

306 lines
8.1 KiB
Rust

use std::{
collections::HashMap,
sync::Arc,
time::{Duration, Instant},
};
use chrono::Utc;
use futures_util::{StreamExt, stream::FuturesUnordered};
use ncro_db::{Db, DbError, RouteEntry};
use ncro_health::{Prober, Status};
use ncro_narinfo::{NarInfo, NarInfoError, parse_public_key};
use thiserror::Error;
use tokio::sync::{Mutex, RwLock};
#[derive(Debug, Error)]
pub enum RouterError {
#[error("not found in any upstream")]
NotFound,
#[error("all upstreams unavailable")]
UpstreamUnavailable,
#[error("no candidates for {0:?}")]
NoCandidates(String),
#[error("narinfo signature verification failed")]
SignatureVerificationFailed,
#[error(transparent)]
Db(#[from] DbError),
}
#[derive(Debug, Clone)]
pub struct ResolveResult {
pub url: String,
pub latency_ms: f64,
pub cache_hit: bool,
pub narinfo_bytes: Option<Vec<u8>>,
}
#[derive(Clone)]
pub struct Router {
inner: Arc<RouterInner>,
}
struct RouterInner {
db: Db,
prober: Prober,
route_ttl: Duration,
race_timeout: Duration,
negative_ttl: Duration,
client: reqwest::Client,
upstream_keys: RwLock<HashMap<String, String>>,
inflight: Mutex<HashMap<String, Arc<Mutex<()>>>>,
}
#[derive(Debug)]
struct RaceResult {
url: String,
latency_ms: f64,
}
impl Router {
#[must_use]
pub fn new(
db: Db,
prober: Prober,
route_ttl: Duration,
race_timeout: Duration,
negative_ttl: Duration,
) -> Self {
Self {
inner: Arc::new(RouterInner {
db,
prober,
route_ttl,
race_timeout,
negative_ttl,
client: reqwest::Client::builder()
.timeout(race_timeout)
.build()
.unwrap_or_else(|_| reqwest::Client::new()),
upstream_keys: RwLock::new(HashMap::new()),
inflight: Mutex::new(HashMap::new()),
}),
}
}
pub async fn set_upstream_key(
&self,
url: String,
public_key: String,
) -> Result<(), NarInfoError> {
parse_public_key(&public_key)?;
self
.inner
.upstream_keys
.write()
.await
.insert(url, public_key);
Ok(())
}
pub async fn resolve(
&self,
store_hash: &str,
candidates: &[String],
) -> Result<ResolveResult, RouterError> {
if self.inner.db.is_negative(store_hash).await? {
return Err(RouterError::NotFound);
}
if let Some(result) = self.valid_cached_route(store_hash).await? {
return Ok(result);
}
ncro_metrics::get().narinfo_cache_misses.inc();
let lock = {
let mut inflight = self.inner.inflight.lock().await;
Arc::clone(
inflight
.entry(store_hash.to_string())
.or_insert_with(|| Arc::new(Mutex::new(()))),
)
};
let _guard = lock.lock().await;
if let Some(result) = self.valid_cached_route(store_hash).await? {
self.inner.inflight.lock().await.remove(store_hash);
return Ok(result);
}
let result = self.race(store_hash, candidates).await;
if matches!(result, Err(RouterError::NotFound)) {
let _ = self
.inner
.db
.set_negative(store_hash, self.inner.negative_ttl)
.await;
}
self.inner.inflight.lock().await.remove(store_hash);
result
}
async fn valid_cached_route(
&self,
store_hash: &str,
) -> Result<Option<ResolveResult>, RouterError> {
let Some(entry) = self.inner.db.get_route(store_hash).await? else {
return Ok(None);
};
if !entry.is_valid() {
return Ok(None);
}
let health = self.inner.prober.get_health(&entry.upstream_url).await;
if !health.as_ref().is_none_or(|h| h.status == Status::Active) {
return Ok(None);
}
ncro_metrics::get().narinfo_cache_hits.inc();
Ok(Some(ResolveResult {
url: entry.upstream_url,
latency_ms: entry.latency_ema,
cache_hit: true,
narinfo_bytes: None,
}))
}
async fn race(
&self,
store_hash: &str,
candidates: &[String],
) -> Result<ResolveResult, RouterError> {
if candidates.is_empty() {
return Err(RouterError::NoCandidates(store_hash.to_string()));
}
let mut handles = FuturesUnordered::new();
for upstream in candidates {
let upstream = upstream.clone();
let store_hash = store_hash.to_string();
let client = self.inner.client.clone();
handles.push(tokio::spawn(async move {
let start = Instant::now();
let res = client
.head(format!("{upstream}/{store_hash}.narinfo"))
.send()
.await;
match res {
Ok(resp) if resp.status().is_success() => {
Ok(RaceResult {
url: upstream,
latency_ms: start.elapsed().as_secs_f64() * 1000.0,
})
},
Ok(_) => Err(false),
Err(_) => Err(true),
}
}));
}
let mut net_errs = 0;
let mut not_founds = 0;
let mut winner: Option<RaceResult> = None;
let deadline = tokio::time::sleep(self.inner.race_timeout);
tokio::pin!(deadline);
while !handles.is_empty() {
tokio::select! {
() = &mut deadline => break,
joined = handles.next() => {
match joined {
Some(Ok(Ok(res))) => if winner.as_ref().is_none_or(|w| res.latency_ms < w.latency_ms) { winner = Some(res); },
Some(Ok(Err(true)) | Err(_)) => net_errs += 1,
Some(Ok(Err(false))) => not_founds += 1,
None => break,
}
}
}
}
let Some(winner) = winner else {
return if net_errs > 0 && not_founds == 0 {
Err(RouterError::UpstreamUnavailable)
} else {
Err(RouterError::NotFound)
};
};
ncro_metrics::get()
.upstream_race_wins
.with_label_values(&[&winner.url])
.inc();
ncro_metrics::get()
.upstream_latency
.with_label_values(&[&winner.url])
.observe(winner.latency_ms / 1000.0);
let (body, nar_url, nar_hash, nar_size) =
self.fetch_narinfo(&winner.url, store_hash).await?;
let ema = self
.inner
.prober
.get_health(&winner.url)
.await
.map_or(winner.latency_ms, |h| {
0.3f64.mul_add(winner.latency_ms, 0.7 * h.ema_latency)
});
self
.inner
.prober
.record_latency(&winner.url, winner.latency_ms)
.await;
let now = Utc::now();
self
.inner
.db
.set_route(&RouteEntry {
store_path: store_hash.to_string(),
upstream_url: winner.url.clone(),
latency_ms: winner.latency_ms,
latency_ema: ema,
last_verified: now,
query_count: 1,
failure_count: 0,
ttl: now
+ chrono::Duration::from_std(self.inner.route_ttl)
.unwrap_or_default(),
nar_hash,
nar_size,
nar_url,
})
.await?;
Ok(ResolveResult {
url: winner.url,
latency_ms: winner.latency_ms,
cache_hit: false,
narinfo_bytes: body,
})
}
async fn fetch_narinfo(
&self,
upstream: &str,
store_hash: &str,
) -> Result<(Option<Vec<u8>>, String, String, u64), RouterError> {
let Ok(resp) = self
.inner
.client
.get(format!("{upstream}/{store_hash}.narinfo"))
.send()
.await
else {
return Ok((None, String::new(), String::new(), 0));
};
if !resp.status().is_success() {
return Ok((None, String::new(), String::new(), 0));
}
let Ok(bytes) = resp.bytes().await else {
return Ok((None, String::new(), String::new(), 0));
};
let body = bytes.to_vec();
let Ok(parsed) = NarInfo::parse(body.as_slice()) else {
return Ok((Some(body), String::new(), String::new(), 0));
};
if let Some(pubkey) = self.inner.upstream_keys.read().await.get(upstream)
&& !parsed.verify(pubkey).unwrap_or(false)
{
tracing::warn!(
upstream,
store = store_hash,
"narinfo signature verification failed"
);
return Err(RouterError::SignatureVerificationFailed);
}
Ok((Some(body), parsed.url, parsed.nar_hash, parsed.nar_size))
}
}