From 9dde82d46f57bd8b3b8805f3dbe8a162f83e74a5 Mon Sep 17 00:00:00 2001 From: NotAShelf Date: Wed, 18 Feb 2026 11:43:05 +0300 Subject: [PATCH] nix: add tests for channel tarballs and gc pinning Signed-off-by: NotAShelf Change-Id: Ifb9d95d5206b7b1cf23fa3d5aaf9d0db6a6a6964 --- flake.nix | 5 +- nix/tests/api-crud.nix | 9 +- nix/tests/channel-tarball.nix | 158 ++++++++++++++++++++++++++ nix/tests/features.nix | 4 +- nix/tests/gc-pinning.nix | 165 +++++++++++++++++++++++++++ nix/tests/machine-health.nix | 204 ++++++++++++++++++++++++++++++++++ 6 files changed, 540 insertions(+), 5 deletions(-) create mode 100644 nix/tests/channel-tarball.nix create mode 100644 nix/tests/gc-pinning.nix create mode 100644 nix/tests/machine-health.nix diff --git a/flake.nix b/flake.nix index e4a80c6..80e6fd6 100644 --- a/flake.nix +++ b/flake.nix @@ -95,9 +95,12 @@ webhooks = pkgs.callPackage ./nix/tests/webhooks.nix {inherit self;}; e2e = pkgs.callPackage ./nix/tests/e2e.nix {inherit self;}; declarative = pkgs.callPackage ./nix/tests/declarative.nix {inherit self;}; + gc-pinning = pkgs.callPackage ./nix/tests/gc-pinning.nix {inherit self;}; + machine-health = pkgs.callPackage ./nix/tests/machine-health.nix {inherit self;}; + channel-tarball = pkgs.callPackage ./nix/tests/channel-tarball.nix {inherit self;}; }; in { - inherit (vmTests) service-startup basic-api auth-rbac api-crud features webhooks e2e declarative; + inherit (vmTests) service-startup basic-api auth-rbac api-crud features webhooks e2e declarative gc-pinning machine-health channel-tarball; full = pkgs.symlinkJoin { name = "vm-tests-full"; paths = builtins.attrValues vmTests; diff --git a/nix/tests/api-crud.nix b/nix/tests/api-crud.nix index 704656f..1c8b973 100644 --- a/nix/tests/api-crud.nix +++ b/nix/tests/api-crud.nix @@ -254,6 +254,9 @@ pkgs.testers.nixosTest { ) assert code.strip() == "403", f"Expected 403 for read-only restart, got {code.strip()}" + # Stop the queue runner so it cannot claim the build before we bump it + machine.systemctl("stop fc-queue-runner.service") + # Create a pending build to test bump with subtest("Create pending build for bump test"): machine.succeed( @@ -288,7 +291,9 @@ pkgs.testers.nixosTest { ) assert "cancelled" in result.strip().lower(), f"Expected cancelled, got: {result.strip()}" - # Evaluation comparison ---- + machine.systemctl("start fc-queue-runner.service") + + # Evaluation comparison with subtest("Trigger second evaluation for comparison"): result = machine.succeed( "curl -sf -X POST http://127.0.0.1:3000/api/v1/evaluations/trigger " @@ -322,7 +327,7 @@ pkgs.testers.nixosTest { assert len(data["new_jobs"]) >= 1, f"Expected at least 1 new job, got {data['new_jobs']}" assert any(j["job_name"] == "new-pkg" for j in data["new_jobs"]), "new-pkg should be in new_jobs" - # Channel CRUD lifecycle ---- + # Channel CRUD lifecycle with subtest("Create channel via API"): result = machine.succeed( "curl -sf -X POST http://127.0.0.1:3000/api/v1/channels " diff --git a/nix/tests/channel-tarball.nix b/nix/tests/channel-tarball.nix new file mode 100644 index 0000000..a19bf6b --- /dev/null +++ b/nix/tests/channel-tarball.nix @@ -0,0 +1,158 @@ +{ + pkgs, + self, +}: +pkgs.testers.nixosTest { + name = "fc-channel-tarball"; + + nodes.machine = { + imports = [ + self.nixosModules.fc-ci + ../vm-common.nix + ]; + _module.args.self = self; + }; + + testScript = '' + import hashlib + import json + + machine.start() + machine.wait_for_unit("postgresql.service") + machine.wait_until_succeeds("sudo -u fc psql -U fc -d fc -c 'SELECT 1'", timeout=30) + machine.wait_for_unit("fc-server.service") + machine.wait_until_succeeds("curl -sf http://127.0.0.1:3000/health", timeout=30) + + api_token = "fc_testkey123" + api_hash = hashlib.sha256(api_token.encode()).hexdigest() + machine.succeed( + f"sudo -u fc psql -U fc -d fc -c \"INSERT INTO api_keys (name, key_hash, role) VALUES ('test', '{api_hash}', 'admin')\"" + ) + auth_header = f"-H 'Authorization: Bearer {api_token}'" + + # Create project + project_id = machine.succeed( + "curl -sf -X POST http://127.0.0.1:3000/api/v1/projects " + f"{auth_header} " + "-H 'Content-Type: application/json' " + "-d '{\"name\": \"tarball-test\", \"repository_url\": \"https://github.com/test/tarball\"}' " + "| jq -r .id" + ).strip() + + # Create jobset + jobset_id = machine.succeed( + f"curl -sf -X POST 'http://127.0.0.1:3000/api/v1/projects/{project_id}/jobsets' " + f"{auth_header} " + "-H 'Content-Type: application/json' " + "-d '{\"name\": \"packages\", \"nix_expression\": \"packages\"}' " + "| jq -r .id" + ).strip() + + # Create evaluation via SQL + eval_id = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + "\"INSERT INTO evaluations (jobset_id, commit_hash, status) " + f"VALUES ('{jobset_id}', 'abc123', 'completed') RETURNING id\" | head -1" + ).strip() + + # Create succeeded builds with output paths + machine.succeed( + "sudo -u fc psql -U fc -d fc -c " + "\"INSERT INTO builds (evaluation_id, job_name, drv_path, status, system, build_output_path) " + f"VALUES ('{eval_id}', 'hello', '/nix/store/fake-hello.drv', 'succeeded', 'x86_64-linux', '/nix/store/aaaa-hello-1.0')\"" + ) + machine.succeed( + "sudo -u fc psql -U fc -d fc -c " + "\"INSERT INTO builds (evaluation_id, job_name, drv_path, status, system, build_output_path) " + f"VALUES ('{eval_id}', 'world', '/nix/store/fake-world.drv', 'succeeded', 'x86_64-linux', '/nix/store/bbbb-world-2.0')\"" + ) + # A failed build should not appear in the tarball + machine.succeed( + "sudo -u fc psql -U fc -d fc -c " + "\"INSERT INTO builds (evaluation_id, job_name, drv_path, status, system) " + f"VALUES ('{eval_id}', 'broken', '/nix/store/fake-broken.drv', 'failed', 'x86_64-linux')\"" + ) + + # Create channel + channel_id = machine.succeed( + "curl -sf -X POST http://127.0.0.1:3000/api/v1/channels " + f"{auth_header} " + "-H 'Content-Type: application/json' " + f"-d '{{\"project_id\": \"{project_id}\", \"name\": \"nixos-unstable\", \"jobset_id\": \"{jobset_id}\"}}' " + "| jq -r .id" + ).strip() + + with subtest("Channel without evaluation returns 404 for tarball"): + # The channel auto-promotes on create if eval exists, so check if it already has one + ch = json.loads(machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/channels/{channel_id}" + )) + if ch["current_evaluation_id"] is None: + code = machine.succeed( + f"curl -s -o /dev/null -w '%{{http_code}}' " + f"http://127.0.0.1:3000/api/v1/channels/{channel_id}/nixexprs.tar.xz" + ) + assert code.strip() == "404", f"Expected 404 for no-eval channel, got {code.strip()}" + + # Promote evaluation to channel + machine.succeed( + f"curl -sf -X POST http://127.0.0.1:3000/api/v1/channels/{channel_id}/promote/{eval_id} " + f"{auth_header}" + ) + + with subtest("Channel has current_evaluation_id after promotion"): + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/channels/{channel_id}" + ) + ch = json.loads(result) + assert ch["current_evaluation_id"] == eval_id, \ + f"Expected current_evaluation_id={eval_id}, got {ch['current_evaluation_id']}" + + with subtest("nixexprs.tar.xz returns 200 with correct content-type"): + headers = machine.succeed( + "curl -sf -D - -o /tmp/nixexprs.tar.xz " + f"http://127.0.0.1:3000/api/v1/channels/{channel_id}/nixexprs.tar.xz" + ) + assert "application/x-xz" in headers, \ + f"Expected application/x-xz content-type, got: {headers}" + + with subtest("Tarball is valid xz and contains default.nix"): + listing = machine.succeed("xz -d < /tmp/nixexprs.tar.xz | tar tf -") + assert "default.nix" in listing, \ + f"Expected default.nix in tarball, got: {listing}" + + with subtest("default.nix contains succeeded builds"): + machine.succeed("xz -d < /tmp/nixexprs.tar.xz | tar xf - -C /tmp") + content = machine.succeed("cat /tmp/default.nix") + assert "hello" in content, "Expected 'hello' job in default.nix" + assert "world" in content, "Expected 'world' job in default.nix" + assert "/nix/store/aaaa-hello-1.0" in content, \ + "Expected hello output path in default.nix" + assert "/nix/store/bbbb-world-2.0" in content, \ + "Expected world output path in default.nix" + + with subtest("default.nix excludes failed builds"): + content = machine.succeed("cat /tmp/default.nix") + assert "broken" not in content, \ + "Failed build 'broken' should not appear in default.nix" + + with subtest("default.nix has mkFakeDerivation structure"): + content = machine.succeed("cat /tmp/default.nix") + assert "mkFakeDerivation" in content, \ + "Expected mkFakeDerivation helper in default.nix" + assert "builtin:fetchurl" in content, \ + "Expected builtin:fetchurl in mkFakeDerivation" + + with subtest("Nonexistent channel returns 404 for tarball"): + code = machine.succeed( + "curl -s -o /dev/null -w '%{http_code}' " + "http://127.0.0.1:3000/api/v1/channels/00000000-0000-0000-0000-000000000000/nixexprs.tar.xz" + ) + assert code.strip() == "404", f"Expected 404 for nonexistent channel, got {code.strip()}" + + # Cleanup + machine.succeed( + f"curl -sf -X DELETE http://127.0.0.1:3000/api/v1/projects/{project_id} {auth_header}" + ) + ''; +} diff --git a/nix/tests/features.nix b/nix/tests/features.nix index caaa6f6..a719625 100644 --- a/nix/tests/features.nix +++ b/nix/tests/features.nix @@ -46,7 +46,7 @@ pkgs.testers.nixosTest { ) ro_header = f"-H 'Authorization: Bearer {ro_token}'" - # Structured logging ---- + # Structured logging with subtest("Server produces structured log output"): # The server should log via tracing with the configured format result = machine.succeed("journalctl -u fc-server --no-pager -n 50 2>&1") @@ -54,7 +54,7 @@ pkgs.testers.nixosTest { assert "INFO" in result or "info" in result, \ "Expected structured log lines with INFO level in journalctl output" - # Static CSS serving ---- + # Static CSS serving with subtest("Static CSS endpoint returns 200 with correct content type"): code = machine.succeed( "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:3000/static/style.css" diff --git a/nix/tests/gc-pinning.nix b/nix/tests/gc-pinning.nix new file mode 100644 index 0000000..7cd682d --- /dev/null +++ b/nix/tests/gc-pinning.nix @@ -0,0 +1,165 @@ +{ + self, + pkgs, + lib, +}: let + inherit (lib.modules) mkForce; +in + pkgs.testers.nixosTest { + name = "fc-gc-pinning"; + + nodes.machine = { + imports = [ + self.nixosModules.fc-ci + ../vm-common.nix + ]; + _module.args.self = self; + + services.fc-ci.settings.gc = { + enabled = mkForce true; + gc_roots_dir = "/var/lib/fc/gc-roots"; + cleanup_interval = 9999; + max_age_days = 1; + }; + }; + + testScript = '' + import hashlib + import json + + machine.start() + machine.wait_for_unit("postgresql.service") + machine.wait_until_succeeds("sudo -u fc psql -U fc -d fc -c 'SELECT 1'", timeout=30) + machine.wait_for_unit("fc-server.service") + machine.wait_until_succeeds("curl -sf http://127.0.0.1:3000/health", timeout=30) + + api_token = "fc_testkey123" + api_hash = hashlib.sha256(api_token.encode()).hexdigest() + machine.succeed( + f"sudo -u fc psql -U fc -d fc -c \"INSERT INTO api_keys (name, key_hash, role) VALUES ('test', '{api_hash}', 'admin')\"" + ) + auth_header = f"-H 'Authorization: Bearer {api_token}'" + + ro_token = "fc_readonly_key" + ro_hash = hashlib.sha256(ro_token.encode()).hexdigest() + machine.succeed( + f"sudo -u fc psql -U fc -d fc -c \"INSERT INTO api_keys (name, key_hash, role) VALUES ('readonly', '{ro_hash}', 'read-only')\"" + ) + ro_header = f"-H 'Authorization: Bearer {ro_token}'" + + # Create project + project_id = machine.succeed( + "curl -sf -X POST http://127.0.0.1:3000/api/v1/projects " + f"{auth_header} " + "-H 'Content-Type: application/json' " + "-d '{\"name\": \"gc-pin-test\", \"repository_url\": \"https://github.com/test/gc\"}' " + "| jq -r .id" + ).strip() + + with subtest("Jobset has default keep_nr of 3"): + result = machine.succeed( + f"curl -sf -X POST 'http://127.0.0.1:3000/api/v1/projects/{project_id}/jobsets' " + f"{auth_header} " + "-H 'Content-Type: application/json' " + "-d '{\"name\": \"default\", \"nix_expression\": \"packages\"}' " + "| jq -r .keep_nr" + ) + assert result.strip() == "3", f"Expected default keep_nr=3, got {result.strip()}" + + with subtest("keep_nr persists in database"): + machine.succeed( + "sudo -u fc psql -U fc -d fc -c " + "\"UPDATE jobsets SET keep_nr = 7 WHERE name = 'default'\"" + ) + result = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + "\"SELECT keep_nr FROM jobsets WHERE name = 'default'\"" + ) + assert result.strip() == "7", f"Expected keep_nr=7, got {result.strip()}" + + with subtest("keep_nr visible in active_jobsets view"): + result = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + "\"SELECT keep_nr FROM active_jobsets WHERE name = 'default' LIMIT 1\"" + ) + assert result.strip() == "7", f"Expected keep_nr=7 in view, got {result.strip()}" + + # Create evaluation + build for keep flag tests + jobset_id = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + f"\"SELECT id FROM jobsets WHERE project_id = '{project_id}' AND name = 'default'\"" + ).strip() + + eval_id = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + f"\"INSERT INTO evaluations (jobset_id, commit_hash, status) VALUES ('{jobset_id}', 'abc123', 'completed') RETURNING id\" | head -1" + ).strip() + + build_id = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + f"\"INSERT INTO builds (evaluation_id, job_name, drv_path, status, system) " + f"VALUES ('{eval_id}', 'hello', '/nix/store/fake.drv', 'succeeded', 'x86_64-linux') RETURNING id\" | head -1" + ).strip() + + with subtest("Build starts with keep=false"): + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/builds/{build_id} | jq -r .keep" + ) + assert result.strip() == "false", f"Expected keep=false, got {result.strip()}" + + with subtest("PUT /builds/id/keep/true sets keep flag"): + code = machine.succeed( + "curl -s -o /dev/null -w '%{http_code}' " + f"-X PUT http://127.0.0.1:3000/api/v1/builds/{build_id}/keep/true " + f"{auth_header}" + ) + assert code.strip() == "200", f"Expected 200, got {code.strip()}" + + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/builds/{build_id} | jq -r .keep" + ) + assert result.strip() == "true", f"Expected keep=true, got {result.strip()}" + + with subtest("PUT /builds/id/keep/false clears keep flag"): + machine.succeed( + f"curl -sf -X PUT http://127.0.0.1:3000/api/v1/builds/{build_id}/keep/false " + f"{auth_header}" + ) + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/builds/{build_id} | jq -r .keep" + ) + assert result.strip() == "false", f"Expected keep=false, got {result.strip()}" + + with subtest("Read-only key cannot set keep flag"): + code = machine.succeed( + "curl -s -o /dev/null -w '%{http_code}' " + f"-X PUT http://127.0.0.1:3000/api/v1/builds/{build_id}/keep/true " + f"{ro_header}" + ) + assert code.strip() == "403", f"Expected 403, got {code.strip()}" + + with subtest("keep=true visible in API response"): + machine.succeed( + f"curl -sf -X PUT http://127.0.0.1:3000/api/v1/builds/{build_id}/keep/true " + f"{auth_header}" + ) + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/builds/{build_id}" + ) + build_json = json.loads(result) + assert build_json["keep"] is True, f"Expected keep=true in JSON, got {build_json.get('keep')}" + + with subtest("Nonexistent build returns 404 for keep"): + code = machine.succeed( + "curl -s -o /dev/null -w '%{http_code}' " + "-X PUT http://127.0.0.1:3000/api/v1/builds/00000000-0000-0000-0000-000000000000/keep/true " + f"{auth_header}" + ) + assert code.strip() == "404", f"Expected 404, got {code.strip()}" + + # Cleanup + machine.succeed( + f"curl -sf -X DELETE http://127.0.0.1:3000/api/v1/projects/{project_id} {auth_header}" + ) + ''; + } diff --git a/nix/tests/machine-health.nix b/nix/tests/machine-health.nix new file mode 100644 index 0000000..41d9dd6 --- /dev/null +++ b/nix/tests/machine-health.nix @@ -0,0 +1,204 @@ +{ + pkgs, + self, +}: +pkgs.testers.nixosTest { + name = "fc-machine-health"; + + nodes.machine = { + imports = [ + self.nixosModules.fc-ci + ../vm-common.nix + ]; + _module.args.self = self; + }; + + testScript = '' + import hashlib + import json + + machine.start() + machine.wait_for_unit("postgresql.service") + machine.wait_until_succeeds("sudo -u fc psql -U fc -d fc -c 'SELECT 1'", timeout=30) + machine.wait_for_unit("fc-server.service") + machine.wait_until_succeeds("curl -sf http://127.0.0.1:3000/health", timeout=30) + + api_token = "fc_testkey123" + api_hash = hashlib.sha256(api_token.encode()).hexdigest() + machine.succeed( + f"sudo -u fc psql -U fc -d fc -c \"INSERT INTO api_keys (name, key_hash, role) VALUES ('test', '{api_hash}', 'admin')\"" + ) + auth_header = f"-H 'Authorization: Bearer {api_token}'" + + # Create a builder via API + builder_json = machine.succeed( + "curl -sf -X POST http://127.0.0.1:3000/api/v1/admin/builders " + f"{auth_header} " + "-H 'Content-Type: application/json' " + "-d '{\"name\": \"test-builder\", \"ssh_uri\": \"ssh://builder@host\", \"systems\": [\"x86_64-linux\"]}'" + ) + builder = json.loads(builder_json) + builder_id = builder["id"] + + with subtest("New builder starts with zero failures"): + assert builder["consecutive_failures"] == 0, \ + f"Expected 0 failures, got {builder['consecutive_failures']}" + assert builder["disabled_until"] is None, \ + f"Expected disabled_until=null, got {builder['disabled_until']}" + assert builder["last_failure"] is None, \ + f"Expected last_failure=null, got {builder['last_failure']}" + + with subtest("Recording failure increments consecutive_failures"): + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + "UPDATE remote_builders SET " + "consecutive_failures = LEAST(consecutive_failures + 1, 4), " + "last_failure = NOW(), " + "disabled_until = NOW() + interval '60 seconds' " + f"WHERE id = '{builder_id}'\"" + ) + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/admin/builders/{builder_id}" + ) + b = json.loads(result) + assert b["consecutive_failures"] == 1, \ + f"Expected 1 failure, got {b['consecutive_failures']}" + assert b["disabled_until"] is not None, \ + "Expected disabled_until to be set" + assert b["last_failure"] is not None, \ + "Expected last_failure to be set" + + with subtest("Failures cap at 4"): + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + f"UPDATE remote_builders SET consecutive_failures = 10 WHERE id = '{builder_id}'\"" + ) + # Simulate record_failure SQL (same as repo code) + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + "UPDATE remote_builders SET " + "consecutive_failures = LEAST(consecutive_failures + 1, 4), " + "last_failure = NOW(), " + "disabled_until = NOW() + make_interval(secs => 60.0 * power(3, LEAST(consecutive_failures + 1, 4) - 1)) " + f"WHERE id = '{builder_id}'\"" + ) + result = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + f"\"SELECT consecutive_failures FROM remote_builders WHERE id = '{builder_id}'\"" + ) + assert result.strip() == "4", f"Expected failures capped at 4, got {result.strip()}" + + with subtest("Disabled builder excluded from find_for_system"): + # Set disabled_until far in the future + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + "UPDATE remote_builders SET disabled_until = NOW() + interval '1 hour' " + f"WHERE id = '{builder_id}'\"" + ) + result = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + "\"SELECT count(*) FROM remote_builders " + "WHERE enabled = true " + "AND 'x86_64-linux' = ANY(systems) " + "AND (disabled_until IS NULL OR disabled_until < NOW())\"" + ) + assert result.strip() == "0", \ + f"Expected disabled builder excluded, got count={result.strip()}" + + with subtest("Non-disabled builder included in find_for_system"): + # Clear disabled_until + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + f"UPDATE remote_builders SET disabled_until = NULL WHERE id = '{builder_id}'\"" + ) + result = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + "\"SELECT count(*) FROM remote_builders " + "WHERE enabled = true " + "AND 'x86_64-linux' = ANY(systems) " + "AND (disabled_until IS NULL OR disabled_until < NOW())\"" + ) + assert result.strip() == "1", \ + f"Expected non-disabled builder included, got count={result.strip()}" + + with subtest("Recording success resets health state"): + # First set some failures + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + "UPDATE remote_builders SET " + "consecutive_failures = 3, " + "disabled_until = NOW() + interval '1 hour', " + "last_failure = NOW() " + f"WHERE id = '{builder_id}'\"" + ) + # Simulate record_success (same as repo code) + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + "UPDATE remote_builders SET " + "consecutive_failures = 0, " + "disabled_until = NULL " + f"WHERE id = '{builder_id}'\"" + ) + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/admin/builders/{builder_id}" + ) + b = json.loads(result) + assert b["consecutive_failures"] == 0, \ + f"Expected 0 failures after success, got {b['consecutive_failures']}" + assert b["disabled_until"] is None, \ + f"Expected disabled_until=null after success, got {b['disabled_until']}" + + with subtest("Health fields visible in admin API list"): + result = machine.succeed( + f"curl -sf http://127.0.0.1:3000/api/v1/admin/builders {auth_header}" + ) + builders = json.loads(result) + assert len(builders) >= 1, "Expected at least one builder" + b = builders[0] + assert "consecutive_failures" in b, "Missing consecutive_failures in API response" + assert "disabled_until" in b, "Missing disabled_until in API response" + assert "last_failure" in b, "Missing last_failure in API response" + + with subtest("Exponential backoff increases with failures"): + # Record 1st failure: expect ~60s backoff + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + f"UPDATE remote_builders SET consecutive_failures = 0, disabled_until = NULL WHERE id = '{builder_id}'\"" + ) + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + "UPDATE remote_builders SET " + "consecutive_failures = LEAST(consecutive_failures + 1, 4), " + "last_failure = NOW(), " + "disabled_until = NOW() + make_interval(secs => 60.0 * power(3, LEAST(consecutive_failures + 1, 4) - 1)) " + f"WHERE id = '{builder_id}'\"" + ) + delta1 = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + f"\"SELECT EXTRACT(EPOCH FROM (disabled_until - last_failure))::int FROM remote_builders WHERE id = '{builder_id}'\"" + ) + d1 = int(delta1.strip()) + assert 55 <= d1 <= 65, f"1st failure backoff expected ~60s, got {d1}s" + + # Record 2nd failure: expect ~180s backoff + machine.succeed( + "sudo -u fc psql -U fc -d fc -c \"" + "UPDATE remote_builders SET " + "consecutive_failures = LEAST(consecutive_failures + 1, 4), " + "last_failure = NOW(), " + "disabled_until = NOW() + make_interval(secs => 60.0 * power(3, LEAST(consecutive_failures + 1, 4) - 1)) " + f"WHERE id = '{builder_id}'\"" + ) + delta2 = machine.succeed( + "sudo -u fc psql -U fc -d fc -tA -c " + f"\"SELECT EXTRACT(EPOCH FROM (disabled_until - last_failure))::int FROM remote_builders WHERE id = '{builder_id}'\"" + ) + d2 = int(delta2.strip()) + assert 175 <= d2 <= 185, f"2nd failure backoff expected ~180s, got {d2}s" + + # Cleanup + machine.succeed( + f"curl -sf -X DELETE http://127.0.0.1:3000/api/v1/admin/builders/{builder_id} {auth_header}" + ) + ''; +}