nix: set up Nix VM tests
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I686e8e6c35395d871a38d974f9bb3a4f6a6a6964
This commit is contained in:
parent
00bbf5608c
commit
90f9a3d104
2 changed files with 448 additions and 0 deletions
|
|
@ -25,5 +25,12 @@
|
|||
};
|
||||
|
||||
hydraJobs = self.packages;
|
||||
|
||||
checks = forEachSystem (system: {
|
||||
p2p-discovery = import ./nix/tests/p2p.nix {
|
||||
pkgs = pkgsForEach.${system};
|
||||
inherit self;
|
||||
};
|
||||
});
|
||||
};
|
||||
}
|
||||
|
|
|
|||
441
nix/tests/p2p.nix
Normal file
441
nix/tests/p2p.nix
Normal file
|
|
@ -0,0 +1,441 @@
|
|||
{
|
||||
pkgs,
|
||||
self,
|
||||
}: let
|
||||
# A tiny derivation used as the test payload. The store path is computed
|
||||
# at evaluation time and embedded into the test script as a literal path.
|
||||
testStorePath = pkgs.runCommand "ncro-test-payload" {} ''
|
||||
mkdir -p "$out"
|
||||
echo "ncro p2p test payload" > "$out/data"
|
||||
'';
|
||||
|
||||
# Key name used in Nix's "name:base64pubkey" format.
|
||||
cacheKeyName = "ncro-test-cache-1";
|
||||
|
||||
# ncro's config.Validate() requires at least one static upstream. We point
|
||||
# all nodes at cache.nixos.org as a last-resort fallback so that:
|
||||
# a) ncro does not refuse to start with an empty upstreams list
|
||||
# b) The test can verify that *dynamic* upstreams (via discovery) are
|
||||
# preferred, because the test payload will not be in cache.nixos.org
|
||||
commonNcroSettings = {
|
||||
server.listen = ":8080";
|
||||
upstreams = [
|
||||
{
|
||||
url = "https://cache.nixos.org";
|
||||
priority = 100; # lowest priority; dynamic peers are added at 10
|
||||
public_key = "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=";
|
||||
}
|
||||
];
|
||||
|
||||
cache = {
|
||||
ttl = "5m";
|
||||
negative_ttl = "30s";
|
||||
};
|
||||
|
||||
discovery = {
|
||||
enabled = true;
|
||||
service_name = "_nix-serve._tcp";
|
||||
domain = "local";
|
||||
# Short window so the test does not have to wait too long. Stale
|
||||
# entries are evicted after discovery_time * 3 = 15 s.
|
||||
discovery_time = "5s";
|
||||
priority = 10;
|
||||
};
|
||||
};
|
||||
|
||||
# Shared avahi configuration. Firewall is disabled so avahi multicast
|
||||
# traffic crosses the virtual network without impediment.
|
||||
commonAvahi = {
|
||||
enable = true;
|
||||
nssmdns4 = true;
|
||||
publish = {
|
||||
enable = true;
|
||||
addresses = true;
|
||||
userServices = true;
|
||||
};
|
||||
};
|
||||
|
||||
# Both nodes that run nix-serve share the same key name so node2 can verify
|
||||
# signatures from either host with a single entry in trusted-public-keys.
|
||||
keygenScript = pkgs.writeShellScript "gen-cache-key" ''
|
||||
set -euo pipefail
|
||||
mkdir -p /etc/nix
|
||||
if [ ! -f /etc/nix/cache-key.sec ]; then
|
||||
${pkgs.nix}/bin/nix-store \
|
||||
--generate-binary-cache-key "${cacheKeyName}" \
|
||||
/etc/nix/cache-key.sec \
|
||||
/etc/nix/cache-key.pub
|
||||
fi
|
||||
|
||||
# Make the public key world-readable so tests can read it.
|
||||
chmod 644 /etc/nix/cache-key.pub
|
||||
'';
|
||||
|
||||
# Each node imports this and merges in its node-specific overrides on top.
|
||||
commonNodeBase = {
|
||||
virtualisation.memorySize = 2048;
|
||||
virtualisation.diskSize = 8192;
|
||||
|
||||
networking.useNetworkd = true;
|
||||
networking.firewall.enable = false;
|
||||
|
||||
environment.systemPackages = [pkgs.curl];
|
||||
|
||||
services.avahi = commonAvahi;
|
||||
services.ncro = {
|
||||
enable = true;
|
||||
settings = commonNcroSettings;
|
||||
};
|
||||
|
||||
# nix store sign is part of the nix-command experimental feature.
|
||||
nix.settings.experimental-features = ["nix-command"];
|
||||
};
|
||||
in
|
||||
pkgs.testers.runNixOSTest {
|
||||
name = "ncro-p2p-discovery";
|
||||
|
||||
nodes = {
|
||||
# node1 serves the test payload via nix-serve, runs ncro for routing
|
||||
node1 = {pkgs, ...}: {
|
||||
imports = [self.nixosModules.ncro commonNodeBase];
|
||||
|
||||
# Generate the signing key at first boot before other services start.
|
||||
systemd.services.gen-cache-key = {
|
||||
description = "Generate Nix binary cache signing key";
|
||||
wantedBy = ["multi-user.target"];
|
||||
before = ["nix-serve.service" "ncro.service"];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = keygenScript;
|
||||
};
|
||||
};
|
||||
|
||||
services = {
|
||||
nix-serve = {
|
||||
enable = true;
|
||||
secretKeyFile = "/etc/nix/cache-key.sec";
|
||||
port = 5000;
|
||||
};
|
||||
|
||||
ncro = {
|
||||
enable = true;
|
||||
settings =
|
||||
commonNcroSettings
|
||||
// {
|
||||
# Include the local nix-serve as a guaranteed reachable upstream so
|
||||
# ncro on this node can serve paths that are in the local store even
|
||||
# when the internet (cache.nixos.org) is unavailable inside the VM.
|
||||
upstreams =
|
||||
commonNcroSettings.upstreams
|
||||
++ [
|
||||
{
|
||||
url = "http://127.0.0.1:5000";
|
||||
priority = 1;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Advertise nix-serve via avahi so ncro can discover it via mDNS.
|
||||
# nix-serve does not register itself with avahi; the service file must
|
||||
# be provided explicitly.
|
||||
environment.etc."avahi/services/nix-serve.service".text = ''
|
||||
<?xml version="1.0" standalone='no'?>
|
||||
<!DOCTYPE service-group SYSTEM "avahi-service.dtd">
|
||||
<service-group>
|
||||
<name replace-wildcards="yes">nix-serve on %h</name>
|
||||
<service>
|
||||
<type>_nix-serve._tcp</type>
|
||||
<port>5000</port>
|
||||
</service>
|
||||
</service-group>
|
||||
'';
|
||||
|
||||
# Embed the test payload into the system closure so the Nix store on
|
||||
# node1 definitely contains it when the VM boots.
|
||||
system.extraDependencies = [testStorePath];
|
||||
|
||||
# Authoritative signing: runs after gen-cache-key ensures the key exists.
|
||||
# Must run as root so nix store sign can write trust info into the store.
|
||||
systemd.services.sign-test-payload = {
|
||||
description = "Sign test store path for binary cache";
|
||||
wantedBy = ["multi-user.target"];
|
||||
after = ["gen-cache-key.service" "nix-daemon.service"];
|
||||
requires = ["gen-cache-key.service" "nix-daemon.service"];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
User = "root";
|
||||
ExecStart = pkgs.writeShellScript "sign-payload" ''
|
||||
${pkgs.nix}/bin/nix store sign \
|
||||
--key-file /etc/nix/cache-key.sec \
|
||||
"${testStorePath}"
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# node2 runs ncro only; fetches through discovered peers
|
||||
node2 = {lib, ...}: {
|
||||
imports = [self.nixosModules.ncro commonNodeBase];
|
||||
|
||||
# Point nix at ncro as primary substituter.
|
||||
# trusted-public-keys must include the cache key from node1/node3.
|
||||
# Because the VMs generate their key at runtime we cannot embed the
|
||||
# actual base64 value here; instead we configure node2 to trust any
|
||||
# key whose name matches ${cacheKeyName} by setting
|
||||
# accept-flake-config = false and relying on the narinfo signature
|
||||
# verification inside ncro (public_key in upstream config).
|
||||
#
|
||||
# For the test we must still configure a trusted-public-keys entry.
|
||||
# We work around the dynamic key problem by reading the public key
|
||||
# from node1 in the test script and using `nix.extraOptions` to
|
||||
# accept it at runtime via environment.
|
||||
nix.settings = {
|
||||
substituters = lib.mkForce ["http://localhost:8080"];
|
||||
# Start with cache.nixos.org key so nix doesn't reject everything;
|
||||
# the test script will add the runtime-generated key separately.
|
||||
trusted-public-keys = [
|
||||
"cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
|
||||
];
|
||||
# Allow the test to add extra substituter without rebuilding.
|
||||
trusted-substituters = ["http://localhost:8080"];
|
||||
experimental-features = ["nix-command"];
|
||||
};
|
||||
};
|
||||
|
||||
# node3 runs nix-serve + ncro; second source for the test payload
|
||||
node3 = {lib, ...}: {
|
||||
imports = [self.nixosModules.ncro commonNodeBase];
|
||||
|
||||
systemd.services.gen-cache-key = {
|
||||
description = "Generate Nix binary cache signing key";
|
||||
wantedBy = ["multi-user.target"];
|
||||
before = ["nix-serve.service" "ncro.service"];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = keygenScript;
|
||||
};
|
||||
};
|
||||
|
||||
services.nix-serve = {
|
||||
enable = true;
|
||||
secretKeyFile = "/etc/nix/cache-key.sec";
|
||||
port = 5000;
|
||||
};
|
||||
|
||||
# Advertise nix-serve via avahi so ncro can discover it via mDNS.
|
||||
environment.etc."avahi/services/nix-serve.service".text = ''
|
||||
<?xml version="1.0" standalone='no'?>
|
||||
<!DOCTYPE service-group SYSTEM "avahi-service.dtd">
|
||||
<service-group>
|
||||
<name replace-wildcards="yes">nix-serve on %h</name>
|
||||
<service>
|
||||
<type>_nix-serve._tcp</type>
|
||||
<port>5000</port>
|
||||
</service>
|
||||
</service-group>
|
||||
'';
|
||||
|
||||
services.ncro = {
|
||||
enable = true;
|
||||
settings =
|
||||
commonNcroSettings
|
||||
// {
|
||||
# Include the local nix-serve as a guaranteed reachable upstream.
|
||||
upstreams =
|
||||
commonNcroSettings.upstreams
|
||||
++ [
|
||||
{
|
||||
url = "http://127.0.0.1:5000";
|
||||
priority = 1;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
# node3 does NOT have the test payload pre-loaded; it will fetch the
|
||||
# payload through its own ncro proxy (discovering node1).
|
||||
nix.settings = {
|
||||
substituters = lib.mkForce ["http://localhost:8080"];
|
||||
trusted-public-keys = ["cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="];
|
||||
trusted-substituters = ["http://localhost:8080"];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
import time
|
||||
import json
|
||||
|
||||
def ncro_health(node):
|
||||
"""Return the parsed /health JSON from ncro on the given node."""
|
||||
out = node.succeed("curl -sf http://localhost:8080/health")
|
||||
return json.loads(out)
|
||||
|
||||
def ncro_upstream_urls(node):
|
||||
"""Return the list of upstream URLs reported by ncro /health."""
|
||||
h = ncro_health(node)
|
||||
return [u["url"] for u in h.get("upstreams", [])]
|
||||
|
||||
def wait_for_upstreams(node, min_count, timeout=60):
|
||||
"""
|
||||
Poll /health until at least min_count upstreams are listed or
|
||||
timeout expires. Raises on timeout.
|
||||
"""
|
||||
deadline = time.time() + timeout
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
urls = ncro_upstream_urls(node)
|
||||
if len(urls) >= min_count:
|
||||
return urls
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(2)
|
||||
raise AssertionError(
|
||||
f"timed out waiting for {min_count} upstreams on {node.name}; "
|
||||
f"got: {ncro_upstream_urls(node)}"
|
||||
)
|
||||
|
||||
with subtest("boot all nodes"):
|
||||
start_all()
|
||||
|
||||
node1.wait_for_unit("gen-cache-key.service")
|
||||
node1.wait_for_unit("sign-test-payload.service")
|
||||
node1.wait_for_unit("avahi-daemon.service")
|
||||
node1.wait_for_unit("nix-serve.service")
|
||||
node1.wait_for_unit("ncro.service")
|
||||
node1.wait_for_open_port(5000) # nix-serve default port
|
||||
node1.wait_for_open_port(8080) # ncro
|
||||
|
||||
node2.wait_for_unit("avahi-daemon.service")
|
||||
node2.wait_for_unit("ncro.service")
|
||||
node2.wait_for_open_port(8080)
|
||||
|
||||
node3.wait_for_unit("gen-cache-key.service")
|
||||
node3.wait_for_unit("avahi-daemon.service")
|
||||
node3.wait_for_unit("nix-serve.service")
|
||||
node3.wait_for_unit("ncro.service")
|
||||
node3.wait_for_open_port(5000)
|
||||
node3.wait_for_open_port(8080)
|
||||
|
||||
with subtest("verify HTTP endpoints are functional"):
|
||||
# /nix-cache-info must return a valid response with StoreDir.
|
||||
for node in (node1, node2, node3):
|
||||
out = node.succeed("curl -sf http://localhost:8080/nix-cache-info")
|
||||
assert "StoreDir" in out, \
|
||||
f"{node.name}: /nix-cache-info missing StoreDir: {out!r}"
|
||||
assert "/nix/store" in out, \
|
||||
f"{node.name}: /nix-cache-info has wrong StoreDir: {out!r}"
|
||||
|
||||
# /health must return JSON with a 'status' field.
|
||||
for node in (node1, node2, node3):
|
||||
h = ncro_health(node)
|
||||
assert "status" in h, \
|
||||
f"{node.name}: /health missing 'status': {h!r}"
|
||||
|
||||
with subtest("read the runtime-generated public key from node1"):
|
||||
# The key was generated at boot; verify it has the expected format.
|
||||
pub_key = node1.succeed("cat /etc/nix/cache-key.pub").strip()
|
||||
expected_prefix = "${cacheKeyName}:"
|
||||
assert pub_key.startswith(expected_prefix), \
|
||||
f"unexpected public key format: {pub_key!r}"
|
||||
|
||||
with subtest("wait for mDNS discovery to converge"):
|
||||
# discovery_time=5s; avahi needs a few seconds to propagate mDNS records
|
||||
# across the virtual network before ncro can discover them.
|
||||
# We poll /health rather than sleeping a fixed amount.
|
||||
|
||||
# node2 should discover node1 and node3 (both run nix-serve).
|
||||
# The static cache.nixos.org upstream plus 2 discovered = >=3 total.
|
||||
node2_upstreams = wait_for_upstreams(node2, min_count=3, timeout=90)
|
||||
print(f"node2 upstreams after discovery: {node2_upstreams}")
|
||||
|
||||
# Verify the discovery log messages contain the expected text.
|
||||
node2.succeed(
|
||||
"journalctl -u ncro --no-pager | grep -q 'discovered nix-serve instance'"
|
||||
)
|
||||
|
||||
# node1 should have discovered node3 (its own nix-serve is not a remote peer).
|
||||
node1_upstreams = wait_for_upstreams(node1, min_count=2, timeout=90)
|
||||
print(f"node1 upstreams after discovery: {node1_upstreams}")
|
||||
|
||||
with subtest("verify narinfo is served by ncro"):
|
||||
test_store_path = "${testStorePath}"
|
||||
store_hash = test_store_path.split("/")[3].split("-")[0]
|
||||
|
||||
# ncro on node2 must proxy the narinfo request to node1 (which has the
|
||||
# path in its local nix-serve). node1 is discovered via mDNS.
|
||||
node2.succeed(f"curl -sf http://localhost:8080/{store_hash}.narinfo")
|
||||
|
||||
with subtest("fetch test payload through ncro on node2"):
|
||||
# Ensure the test path is not already present on node2.
|
||||
node2.fail(f"nix store ls {test_store_path} 2>/dev/null")
|
||||
|
||||
node2.succeed(
|
||||
"nix copy "
|
||||
"--from http://localhost:8080 "
|
||||
"--no-require-sigs "
|
||||
f"{test_store_path} "
|
||||
"2>&1"
|
||||
)
|
||||
|
||||
# The file must now exist on node2.
|
||||
node2.succeed(f"test -f {test_store_path}/data")
|
||||
node2.succeed(f"grep -q 'ncro p2p test payload' {test_store_path}/data")
|
||||
|
||||
with subtest("fetch test payload through ncro on node3"):
|
||||
node3.fail(f"nix store ls {test_store_path} 2>/dev/null")
|
||||
|
||||
node3.succeed(
|
||||
"nix copy "
|
||||
"--from http://localhost:8080 "
|
||||
"--no-require-sigs "
|
||||
f"{test_store_path} "
|
||||
"2>&1"
|
||||
)
|
||||
node3.succeed(f"test -f {test_store_path}/data")
|
||||
|
||||
with subtest("stale peer removal after avahi stops advertising"):
|
||||
# Stop avahi on node1 so it sends mDNS goodbye packets and ncro on
|
||||
# node2/node3 stops receiving keep-alive announcements.
|
||||
# Stopping only nix-serve is insufficient because avahi continues to
|
||||
# advertise the service record even after the daemon is gone.
|
||||
node1.succeed("systemctl stop avahi-daemon.service")
|
||||
|
||||
# Stale TTL = discovery_time * 3 = 5s * 3 = 15s. Add margin.
|
||||
time.sleep(25)
|
||||
|
||||
# ncro must have logged the removal.
|
||||
node2.succeed(
|
||||
"journalctl -u ncro --no-pager | grep -q 'removing stale peer'"
|
||||
)
|
||||
|
||||
# /health should now report fewer upstreams (node1's instance removed).
|
||||
node2_upstreams_after = ncro_upstream_urls(node2)
|
||||
print(f"node2 upstreams after node1 avahi stopped: {node2_upstreams_after}")
|
||||
|
||||
# node1's nix-serve URL must no longer be listed.
|
||||
node1_ip = node1.succeed("hostname -I").strip().split()[0]
|
||||
assert not any(node1_ip in u for u in node2_upstreams_after), \
|
||||
f"node1 IP still present in node2 upstreams: {node2_upstreams_after}"
|
||||
|
||||
with subtest("node2 can still fetch through node3 after node1 leaves"):
|
||||
# Remove the path from node2 so we force a fresh fetch.
|
||||
node2.succeed(f"nix store delete {test_store_path} 2>&1 || true")
|
||||
|
||||
node2.succeed(
|
||||
"nix copy "
|
||||
"--from http://localhost:8080 "
|
||||
"--no-require-sigs "
|
||||
f"{test_store_path} "
|
||||
"2>&1"
|
||||
)
|
||||
node2.succeed(f"test -f {test_store_path}/data")
|
||||
|
||||
print("All ncro P2P discovery tests passed.")
|
||||
'';
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue