nix: add tests for channel tarballs and gc pinning
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Ifb9d95d5206b7b1cf23fa3d5aaf9d0db6a6a6964
This commit is contained in:
parent
78ad3d2039
commit
9dde82d46f
6 changed files with 540 additions and 5 deletions
204
nix/tests/machine-health.nix
Normal file
204
nix/tests/machine-health.nix
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
{
|
||||
pkgs,
|
||||
self,
|
||||
}:
|
||||
pkgs.testers.nixosTest {
|
||||
name = "fc-machine-health";
|
||||
|
||||
nodes.machine = {
|
||||
imports = [
|
||||
self.nixosModules.fc-ci
|
||||
../vm-common.nix
|
||||
];
|
||||
_module.args.self = self;
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
machine.start()
|
||||
machine.wait_for_unit("postgresql.service")
|
||||
machine.wait_until_succeeds("sudo -u fc psql -U fc -d fc -c 'SELECT 1'", timeout=30)
|
||||
machine.wait_for_unit("fc-server.service")
|
||||
machine.wait_until_succeeds("curl -sf http://127.0.0.1:3000/health", timeout=30)
|
||||
|
||||
api_token = "fc_testkey123"
|
||||
api_hash = hashlib.sha256(api_token.encode()).hexdigest()
|
||||
machine.succeed(
|
||||
f"sudo -u fc psql -U fc -d fc -c \"INSERT INTO api_keys (name, key_hash, role) VALUES ('test', '{api_hash}', 'admin')\""
|
||||
)
|
||||
auth_header = f"-H 'Authorization: Bearer {api_token}'"
|
||||
|
||||
# Create a builder via API
|
||||
builder_json = machine.succeed(
|
||||
"curl -sf -X POST http://127.0.0.1:3000/api/v1/admin/builders "
|
||||
f"{auth_header} "
|
||||
"-H 'Content-Type: application/json' "
|
||||
"-d '{\"name\": \"test-builder\", \"ssh_uri\": \"ssh://builder@host\", \"systems\": [\"x86_64-linux\"]}'"
|
||||
)
|
||||
builder = json.loads(builder_json)
|
||||
builder_id = builder["id"]
|
||||
|
||||
with subtest("New builder starts with zero failures"):
|
||||
assert builder["consecutive_failures"] == 0, \
|
||||
f"Expected 0 failures, got {builder['consecutive_failures']}"
|
||||
assert builder["disabled_until"] is None, \
|
||||
f"Expected disabled_until=null, got {builder['disabled_until']}"
|
||||
assert builder["last_failure"] is None, \
|
||||
f"Expected last_failure=null, got {builder['last_failure']}"
|
||||
|
||||
with subtest("Recording failure increments consecutive_failures"):
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
"UPDATE remote_builders SET "
|
||||
"consecutive_failures = LEAST(consecutive_failures + 1, 4), "
|
||||
"last_failure = NOW(), "
|
||||
"disabled_until = NOW() + interval '60 seconds' "
|
||||
f"WHERE id = '{builder_id}'\""
|
||||
)
|
||||
result = machine.succeed(
|
||||
f"curl -sf http://127.0.0.1:3000/api/v1/admin/builders/{builder_id}"
|
||||
)
|
||||
b = json.loads(result)
|
||||
assert b["consecutive_failures"] == 1, \
|
||||
f"Expected 1 failure, got {b['consecutive_failures']}"
|
||||
assert b["disabled_until"] is not None, \
|
||||
"Expected disabled_until to be set"
|
||||
assert b["last_failure"] is not None, \
|
||||
"Expected last_failure to be set"
|
||||
|
||||
with subtest("Failures cap at 4"):
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
f"UPDATE remote_builders SET consecutive_failures = 10 WHERE id = '{builder_id}'\""
|
||||
)
|
||||
# Simulate record_failure SQL (same as repo code)
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
"UPDATE remote_builders SET "
|
||||
"consecutive_failures = LEAST(consecutive_failures + 1, 4), "
|
||||
"last_failure = NOW(), "
|
||||
"disabled_until = NOW() + make_interval(secs => 60.0 * power(3, LEAST(consecutive_failures + 1, 4) - 1)) "
|
||||
f"WHERE id = '{builder_id}'\""
|
||||
)
|
||||
result = machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -tA -c "
|
||||
f"\"SELECT consecutive_failures FROM remote_builders WHERE id = '{builder_id}'\""
|
||||
)
|
||||
assert result.strip() == "4", f"Expected failures capped at 4, got {result.strip()}"
|
||||
|
||||
with subtest("Disabled builder excluded from find_for_system"):
|
||||
# Set disabled_until far in the future
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
"UPDATE remote_builders SET disabled_until = NOW() + interval '1 hour' "
|
||||
f"WHERE id = '{builder_id}'\""
|
||||
)
|
||||
result = machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -tA -c "
|
||||
"\"SELECT count(*) FROM remote_builders "
|
||||
"WHERE enabled = true "
|
||||
"AND 'x86_64-linux' = ANY(systems) "
|
||||
"AND (disabled_until IS NULL OR disabled_until < NOW())\""
|
||||
)
|
||||
assert result.strip() == "0", \
|
||||
f"Expected disabled builder excluded, got count={result.strip()}"
|
||||
|
||||
with subtest("Non-disabled builder included in find_for_system"):
|
||||
# Clear disabled_until
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
f"UPDATE remote_builders SET disabled_until = NULL WHERE id = '{builder_id}'\""
|
||||
)
|
||||
result = machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -tA -c "
|
||||
"\"SELECT count(*) FROM remote_builders "
|
||||
"WHERE enabled = true "
|
||||
"AND 'x86_64-linux' = ANY(systems) "
|
||||
"AND (disabled_until IS NULL OR disabled_until < NOW())\""
|
||||
)
|
||||
assert result.strip() == "1", \
|
||||
f"Expected non-disabled builder included, got count={result.strip()}"
|
||||
|
||||
with subtest("Recording success resets health state"):
|
||||
# First set some failures
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
"UPDATE remote_builders SET "
|
||||
"consecutive_failures = 3, "
|
||||
"disabled_until = NOW() + interval '1 hour', "
|
||||
"last_failure = NOW() "
|
||||
f"WHERE id = '{builder_id}'\""
|
||||
)
|
||||
# Simulate record_success (same as repo code)
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
"UPDATE remote_builders SET "
|
||||
"consecutive_failures = 0, "
|
||||
"disabled_until = NULL "
|
||||
f"WHERE id = '{builder_id}'\""
|
||||
)
|
||||
result = machine.succeed(
|
||||
f"curl -sf http://127.0.0.1:3000/api/v1/admin/builders/{builder_id}"
|
||||
)
|
||||
b = json.loads(result)
|
||||
assert b["consecutive_failures"] == 0, \
|
||||
f"Expected 0 failures after success, got {b['consecutive_failures']}"
|
||||
assert b["disabled_until"] is None, \
|
||||
f"Expected disabled_until=null after success, got {b['disabled_until']}"
|
||||
|
||||
with subtest("Health fields visible in admin API list"):
|
||||
result = machine.succeed(
|
||||
f"curl -sf http://127.0.0.1:3000/api/v1/admin/builders {auth_header}"
|
||||
)
|
||||
builders = json.loads(result)
|
||||
assert len(builders) >= 1, "Expected at least one builder"
|
||||
b = builders[0]
|
||||
assert "consecutive_failures" in b, "Missing consecutive_failures in API response"
|
||||
assert "disabled_until" in b, "Missing disabled_until in API response"
|
||||
assert "last_failure" in b, "Missing last_failure in API response"
|
||||
|
||||
with subtest("Exponential backoff increases with failures"):
|
||||
# Record 1st failure: expect ~60s backoff
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
f"UPDATE remote_builders SET consecutive_failures = 0, disabled_until = NULL WHERE id = '{builder_id}'\""
|
||||
)
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
"UPDATE remote_builders SET "
|
||||
"consecutive_failures = LEAST(consecutive_failures + 1, 4), "
|
||||
"last_failure = NOW(), "
|
||||
"disabled_until = NOW() + make_interval(secs => 60.0 * power(3, LEAST(consecutive_failures + 1, 4) - 1)) "
|
||||
f"WHERE id = '{builder_id}'\""
|
||||
)
|
||||
delta1 = machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -tA -c "
|
||||
f"\"SELECT EXTRACT(EPOCH FROM (disabled_until - last_failure))::int FROM remote_builders WHERE id = '{builder_id}'\""
|
||||
)
|
||||
d1 = int(delta1.strip())
|
||||
assert 55 <= d1 <= 65, f"1st failure backoff expected ~60s, got {d1}s"
|
||||
|
||||
# Record 2nd failure: expect ~180s backoff
|
||||
machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -c \""
|
||||
"UPDATE remote_builders SET "
|
||||
"consecutive_failures = LEAST(consecutive_failures + 1, 4), "
|
||||
"last_failure = NOW(), "
|
||||
"disabled_until = NOW() + make_interval(secs => 60.0 * power(3, LEAST(consecutive_failures + 1, 4) - 1)) "
|
||||
f"WHERE id = '{builder_id}'\""
|
||||
)
|
||||
delta2 = machine.succeed(
|
||||
"sudo -u fc psql -U fc -d fc -tA -c "
|
||||
f"\"SELECT EXTRACT(EPOCH FROM (disabled_until - last_failure))::int FROM remote_builders WHERE id = '{builder_id}'\""
|
||||
)
|
||||
d2 = int(delta2.strip())
|
||||
assert 175 <= d2 <= 185, f"2nd failure backoff expected ~180s, got {d2}s"
|
||||
|
||||
# Cleanup
|
||||
machine.succeed(
|
||||
f"curl -sf -X DELETE http://127.0.0.1:3000/api/v1/admin/builders/{builder_id} {auth_header}"
|
||||
)
|
||||
'';
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue